Author: ssmiweve
Date: 2008-08-20 16:00:58 +0200 (Wed, 20 Aug 2008)
New Revision: 6785
Added:
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml
Modified:
trunk/generic.sesam/pom.xml
trunk/generic.sesam/query-evaluation/pom.xml
trunk/generic.sesam/search-command-control/default/pom.xml
trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml
trunk/generic.sesam/war/src/main/conf/configuration.properties
trunk/pom.xml
Log:
Issue SKER4952: (Solr TokenEvaluator)
Modified: trunk/generic.sesam/pom.xml
===================================================================
--- trunk/generic.sesam/pom.xml 2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/pom.xml 2008-08-20 14:00:58 UTC (rev 6785)
@@ -186,6 +186,7 @@
<!-- Token Evaluator -->
<tokenevaluator.host>localhost</tokenevaluator.host>
<tokenevaluator.port>15400</tokenevaluator.port>
+
<tokenevaluator.solr.serverUrl>http://localhost:16000/solr</tokenevaluator.solr.serverUrl>
<schibstedsok_remote_service_url>localhost:1099</schibstedsok_remote_service_url>
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -234,6 +235,7 @@
<!-- Token Evaluator -->
<tokenevaluator.host>10.16.195.250</tokenevaluator.host>
<tokenevaluator.port>15200</tokenevaluator.port>
+
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -278,6 +280,7 @@
<!-- Token Evaluator -->
<tokenevaluator.host>10.16.195.250</tokenevaluator.host>
<tokenevaluator.port>15200</tokenevaluator.port>
+
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -327,6 +330,7 @@
<!-- Token Evaluator -->
<tokenevaluator.host>10.16.195.250</tokenevaluator.host>
<tokenevaluator.port>15200</tokenevaluator.port>
+
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -371,6 +375,7 @@
<!-- Token Evaluator -->
<tokenevaluator.host>10.16.195.250</tokenevaluator.host>
<tokenevaluator.port>15200</tokenevaluator.port>
+
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -415,6 +420,7 @@
<!-- Token Evaluator -->
<tokenevaluator.host>10.16.195.250</tokenevaluator.host>
<tokenevaluator.port>15200</tokenevaluator.port>
+
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -462,6 +468,7 @@
<!-- Token Evaluator -->
<tokenevaluator.host>10.16.195.250</tokenevaluator.host>
<tokenevaluator.port>15200</tokenevaluator.port>
+
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
Modified: trunk/generic.sesam/query-evaluation/pom.xml
===================================================================
--- trunk/generic.sesam/query-evaluation/pom.xml 2008-08-20 13:55:36 UTC
(rev 6784)
+++ trunk/generic.sesam/query-evaluation/pom.xml 2008-08-20 14:00:58 UTC
(rev 6785)
@@ -33,5 +33,9 @@
<artifactId>sesat-site-spi</artifactId>
<version>${sesat.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.lucene.solr</groupId>
+ <artifactId>solrj</artifactId>
+ </dependency>
</dependencies>
</project>
Added:
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
===================================================================
---
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
(rev 0)
+++
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
2008-08-20 14:00:58 UTC (rev 6785)
@@ -0,0 +1,325 @@
+/* Copyright (2005-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ * SESAT is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SESAT is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with SESAT. If not, see <http://www.gnu.org/licenses/>.
+ */
+package no.sesat.search.query.token;
+
+import java.net.MalformedURLException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import javax.xml.parsers.ParserConfigurationException;
+import no.schibstedsok.commons.ioc.ContextWrapper;
+
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import no.sesat.search.site.Site;
+import no.sesat.search.site.SiteKeyedFactoryInstantiationException;
+import no.sesat.search.site.config.DocumentLoader;
+import no.sesat.search.site.config.SiteConfiguration;
+import org.apache.log4j.Logger;
+import org.apache.log4j.MDC;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ *
+ * @version <tt>$Id$</tt>
+ */
+public final class SolrEvaluatorFactory extends AbstractEvaluatorFactory{
+
+ // Constants -----------------------------------------------------
+
+ private static final String SOLR_EVALUATOR_XMLFILE = "SolrEvaluators.xml";
+
+ private static final ExecutorService EXECUTOR =
Executors.newCachedThreadPool();
+
+ private static final Logger LOG =
Logger.getLogger(SolrEvaluatorFactory.class);
+
+ private static final String ERR_FAILED_CONSTRUCTING_EVALUATOR = "Failed to
construct the evaluator";
+ private static final String ERR_FAILED_INITIALISATION = "Failed reading
configuration files";
+
+ private static final String TOKEN_HOST_PROPERTY =
"tokenevaluator.solr.serverUrl";
+
+
+ // Attributes -----------------------------------------------------
+
+ private final Future solrEvaluatorCreator;
+ private SolrTokenEvaluator solrEvaluator;
+ private SolrServer server;
+
+ private static final Map<Site,Map<TokenPredicate,String[]>> LIST_NAMES
+ = new HashMap<Site,Map<TokenPredicate,String[]>>();
+ private static final ReentrantReadWriteLock LIST_NAMES_LOCK = new
ReentrantReadWriteLock();
+
+ // Constructors -----------------------------------------------------
+
+ public SolrEvaluatorFactory(final Context cxt) throws
SiteKeyedFactoryInstantiationException {
+
+ super(cxt);
+
+ try{
+ final Properties props = SiteConfiguration.instanceOf(
+
ContextWrapper.wrap(SiteConfiguration.Context.class, cxt)).getProperties();
+
+ final String serverUrl = props.getProperty(TOKEN_HOST_PROPERTY);
+ server = new CommonsHttpSolrServer(serverUrl);
+
+ } catch (MalformedURLException ex) {
+
+ throw new SiteKeyedFactoryInstantiationException(ex.getMessage(),
ex);
+ }
+
+ solrEvaluatorCreator = EXECUTOR.submit(new SolrEvaluatorCreator(cxt));
+
+ try {
+ init(cxt);
+
+ } catch (ParserConfigurationException ex) {
+
+ throw new SiteKeyedFactoryInstantiationException(ex.getMessage(),
ex);
+ }
+ }
+
+ // public -----------------------------------------------------
+
+ public TokenEvaluator getEvaluator(final TokenPredicate token) throws
EvaluationException{
+
+ final Context cxt = getContext();
+
+ TokenEvaluator result = isResponsibleFor(token) ? getSolrEvaluator() :
null;
+ if(result == null && null != cxt.getSite().getParent()){
+
+ result = instanceOf(ContextWrapper.wrap(
+ Context.class,
+ cxt.getSite().getParent().getSiteContext(),
+ cxt
+ )).getEvaluator(token);
+
+ }
+ if(null == result || TokenEvaluationEngineImpl.ALWAYS_FALSE_EVALUATOR
== result){
+ // if we cannot find an evaulator, then always fail evaluation.
+ // Rather than encourage a NullPointerException
+ result = TokenEvaluationEngineImpl.ALWAYS_FALSE_EVALUATOR;
+ }
+ return result;
+ }
+
+ @Override
+ public boolean isResponsibleFor(final TokenPredicate token) {
+
+ return null != getListNames(token);
+ }
+
+
+ // Package protected ---------------------------------------------
+
+ SolrServer getSolrServer(){
+ return server;
+ }
+
+ boolean usesListName(final String listname, final String exactname){
+
+ boolean uses = false;
+ try{
+ LIST_NAMES_LOCK.readLock().lock();
+ Site site = getContext().getSite();
+
+ while(!uses && null != site){
+
+ // find listnames used for this token predicate
+ for(String[] listnames : LIST_NAMES.get(site).values()){
+ uses |= 0 <= Arrays.binarySearch(listnames, listname,
null);
+ uses |= null != exactname && 0 <=
Arrays.binarySearch(listnames, exactname, null);
+ if(uses){ break; }
+ }
+
+ // prepare to go to parent
+ site = site.getParent();
+ }
+ }finally{
+ LIST_NAMES_LOCK.readLock().unlock();
+ }
+ return uses;
+ }
+
+ String[] getListNames(final TokenPredicate token){
+
+
+
+ String[] listNames = null;
+ try{
+ LIST_NAMES_LOCK.readLock().lock();
+ Site site = getContext().getSite();
+
+ while(null == listNames && null != site){
+
+ // find listnames used for this token predicate
+ listNames = LIST_NAMES.get(site).get(token);
+
+ // prepare to go to parent
+ site = site.getParent();
+ }
+ }finally{
+ LIST_NAMES_LOCK.readLock().unlock();
+ }
+ return listNames;
+ }
+
+ // private -----------------------------------------------------
+
+ private static void init(final Context cxt) throws
ParserConfigurationException{
+
+ final Site site = cxt.getSite();
+ final Site parent = site.getParent();
+ final boolean parentUninitialised;
+
+ try{
+ LIST_NAMES_LOCK.readLock().lock();
+
+ // initialise the parent site's configuration
+ parentUninitialised = (null != parent && null ==
LIST_NAMES.get(parent));
+
+ }finally{
+ LIST_NAMES_LOCK.readLock().unlock();
+ }
+
+ if(parentUninitialised){
+ init(ContextWrapper.wrap(
+ AbstractEvaluatorFactory.Context.class,
+ parent.getSiteContext(),
+ cxt
+ ));
+ }
+
+ try{
+ LIST_NAMES_LOCK.writeLock().lock();
+
+ if(null == LIST_NAMES.get(site)){
+
+ // create map entry for this site
+ LIST_NAMES.put(site, new HashMap<TokenPredicate,String[]>());
+
+ // initialise this site's configuration
+ final DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
+ final DocumentBuilder builder = factory.newDocumentBuilder();
+
+ final DocumentLoader loader = cxt.newDocumentLoader(cxt,
SOLR_EVALUATOR_XMLFILE, builder);
+ loader.abut();
+
+ LOG.info("Parsing " + SOLR_EVALUATOR_XMLFILE + " started");
+ final Map<TokenPredicate,String[]> listNames =
LIST_NAMES.get(site);
+ final Document doc = loader.getDocument();
+
+ if(null != doc && null != doc.getDocumentElement()){
+
+ final Element root = doc.getDocumentElement();
+ final NodeList lists = root.getElementsByTagName("list");
+ for (int i = 0; i < lists.getLength(); ++i) {
+
+ final Element list = (Element) lists.item(i);
+
+ final String tokenName = list.getAttribute("token");
+ LOG.info(" ->[EMAIL PROTECTED]: " + tokenName);
+
+ TokenPredicate token;
+ try{
+ token =
TokenPredicateUtility.getTokenPredicate(tokenName);
+
+ }catch(IllegalArgumentException iae){
+ LOG.debug(tokenName + " does not exist. Will
create it. Underlying exception was " + iae);
+ token =
TokenPredicateUtility.createAnonymousTokenPredicate(tokenName);
+ }
+
+ final String[] listNameArr =
list.getAttribute("list-name").split(",");
+ LOG.info(" ->lists: " +
list.getAttribute("list-name"));
+
+ // update each listname to the format the fast query
matching servers use
+ if(null != listNameArr){
+ for(int j = 0; j < listNameArr.length; ++j){
+ listNameArr[j] = listNameArr[j];
+ }
+
+ // put the listnames in
+ Arrays.sort(listNameArr, null);
+ listNames.put(token, listNameArr);
+ }
+
+
+ }
+ }
+ LOG.info("Parsing " + SOLR_EVALUATOR_XMLFILE + " finished");
+ }
+ }finally{
+ LIST_NAMES_LOCK.writeLock().unlock();
+ }
+ }
+
+ private SolrTokenEvaluator getSolrEvaluator() throws EvaluationException {
+
+ try {
+ solrEvaluatorCreator.get();
+
+ } catch (InterruptedException ex) {
+ LOG.error(ex.getMessage(), ex);
+ throw new EvaluationException(ex.getMessage(), ex);
+ } catch (ExecutionException ex) {
+ LOG.error(ex.getMessage(), ex);
+ throw new EvaluationException(ex.getMessage(), ex);
+ }
+ if( null == solrEvaluator ){
+ throw new EvaluationException("NPE", new NullPointerException());
+ }
+
+ return solrEvaluator;
+ }
+
+
+ // inner classes -----------------------------------------------------
+
+ private final class SolrEvaluatorCreator implements Runnable{
+
+ private final Context context;
+
+ private SolrEvaluatorCreator(final Context cxt) {
+
+ this.context = cxt;
+ }
+
+ public void run() {
+
+ MDC.put("UNIQUE_ID", context.getUniqueId());
+ try {
+ solrEvaluator = new SolrTokenEvaluator(context,
SolrEvaluatorFactory.this);
+
+ } catch (EvaluationException ex) {
+ LOG.error(ERR_FAILED_CONSTRUCTING_EVALUATOR);
+ }
+
+ MDC.remove("UNIQUE_ID");
+ }
+
+ }
+
+}
Property changes on:
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
___________________________________________________________________
Name: svn:keywords
+ Id
Added:
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
===================================================================
---
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
(rev 0)
+++
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
2008-08-20 14:00:58 UTC (rev 6785)
@@ -0,0 +1,324 @@
+/* Copyright (2005-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ * SESAT is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SESAT is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with SESAT. If not, see <http://www.gnu.org/licenses/>.
+ */
+package no.sesat.search.query.token;
+
+import com.opensymphony.oscache.base.NeedsRefreshException;
+import com.opensymphony.oscache.general.GeneralCacheAdministrator;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import no.sesat.search.query.token.AbstractEvaluatorFactory.Context;
+import static no.sesat.search.query.parser.AbstractQueryParser.SKIP_REGEX;
+import static no.sesat.search.query.parser.AbstractQueryParser.OPERATOR_REGEX;
+import org.apache.log4j.Logger;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+
+/**
+ *
+ * @version $Id$
+ */
+public final class SolrTokenEvaluator implements TokenEvaluator{
+
+ // Constants -----------------------------------------------------
+
+ private static final Logger LOG =
Logger.getLogger(SolrTokenEvaluator.class);
+ private static final Logger DUMP =
Logger.getLogger("no.sesat.search.Dump");
+
+ /** General properties to regular expressions configured. **/
+ private static final int REG_EXP_OPTIONS = Pattern.CASE_INSENSITIVE |
Pattern.UNICODE_CASE;
+ // use the lowercase version of TokenPredicate.EXACT_PREFIX
+ private static final String EXACT_PREFIX =
TokenPredicate.EXACT_PREFIX.toLowerCase();
+
+
+ private static final GeneralCacheAdministrator CACHE_QUERY = new
GeneralCacheAdministrator();
+ private static final int REFRESH_PERIOD = 60;
+ // smaller than usual as each entry can contain up to 600 values!
+ private static final int CACHE_QUERY_CAPACITY = 100;
+
+ private static final String ERR_QUERY_FAILED = "Querying Solr failed on ";
+ private static final String ERR_FAILED_TO_ENCODE = "Failed to encode query
string: ";
+
+ // Attributes ----------------------------------------------------
+
+ private final Context context;
+ private SolrEvaluatorFactory factory;
+ private final Map<String, List<TokenMatch>> analysisResult;
+
+ // Static --------------------------------------------------------
+
+ static{
+ CACHE_QUERY.setCacheCapacity(CACHE_QUERY_CAPACITY);
+ }
+
+ // Constructors --------------------------------------------------
+
+ /**
+ *
+ * @param cxt
+ * @param factory
+ * @throws EvaluationException
+ */
+ public SolrTokenEvaluator(final Context cxt, final SolrEvaluatorFactory
factory) throws EvaluationException{
+
+ context = cxt;
+ this.factory = factory;
+
+ // Remove whitespace (except space itself) and operator characters.
+ analysisResult = query(cleanString(cxt.getQueryString()));
+ }
+
+ // Public --------------------------------------------------------
+
+
+ public boolean evaluateToken(final TokenPredicate token, final String
term, final String query) {
+
+ boolean evaluation = false;
+ final String[] listnames = factory.getListNames(token);
+
+ if(null != listnames){
+ for(int i = 0; !evaluation && i < listnames.length; ++i){
+
+ final String listname = listnames[i];
+
+ if (analysisResult.containsKey(listname)) {
+ if (term == null) {
+ evaluation = true;
+ } else {
+
+ // HACK since DefaultOperatorClause wraps its children
in parenthesis
+ final String hackTerm =
cleanString(term.replaceAll("\\(|\\)",""));
+
+ for (TokenMatch occurance :
analysisResult.get(listname)) {
+
+ final Matcher m = occurance.getMatcher(hackTerm);
+ evaluation = m.find() && m.start() == 0 && m.end()
== hackTerm.length();
+
+ if (evaluation) {
+ break;
+ }
+ }
+ }
+
+ }
+ }
+ }else{
+ LOG.info(context.getSite() + " does not define lists behind the
token predicate " + token);
+ }
+ return evaluation;
+ }
+
+
+ /**
+ * get all match values and values for given list .
+ *
+ * @param token
+ * @param term
+ * @return a list of Tokens
+ */
+ public Set<String> getMatchValues(final TokenPredicate token, final String
term) {
+
+ final Set<String> values = new HashSet<String>();
+
+ final String[] listnames = factory.getListNames(token);
+ if(null != listnames){
+ for(int i = 0; i < listnames.length; i++){
+ final String listname = listnames[i];
+ if (analysisResult.containsKey(listname)) {
+
+ // HACK since DefaultOperatorClause wraps its children in
parenthesis
+ final String hackTerm =
cleanString(term.replaceAll("\\(|\\)",""));
+
+ for (TokenMatch occurance : analysisResult.get(listname)) {
+
+ final Matcher m = occurance.getMatcher(hackTerm);
+
+ if (m.find() && m.start() == 0 && m.end() ==
hackTerm.length()) {
+ values.add(occurance.getValue());
+ }
+ }
+ }
+ }
+ }
+ return Collections.unmodifiableSet(values);
+ }
+
+
+ public boolean isQueryDependant(TokenPredicate predicate) {
+
+ return predicate.name().startsWith(EXACT_PREFIX.toUpperCase());
+ }
+
+ // Z implementation ----------------------------------------------
+
+ // Y overrides ---------------------------------------------------
+
+ // Package protected ---------------------------------------------
+
+ // Protected -----------------------------------------------------
+
+ // Private -------------------------------------------------------
+
+ /**
+ * Search solr and find out if the given tokens are company, firstname,
lastname etc
+ * @param query
+ */
+ @SuppressWarnings("unchecked")
+ private Map<String, List<TokenMatch>> query(final String query) throws
EvaluationException{
+
+ LOG.trace("queryFast( " + query + " )");
+ Map<String, List<TokenMatch>> result = null;
+
+ if (query != null && 0 < query.length()) {
+
+ try{
+ result = (Map<String, List<TokenMatch>>)
CACHE_QUERY.getFromCache(query, REFRESH_PERIOD);
+
+ } catch (NeedsRefreshException nre) {
+
+ boolean updatedCache = false;
+ result = new HashMap<String,List<TokenMatch>>();
+ String url = null;
+
+ try {
+ final String token =
URLEncoder.encode(query.replaceAll("\"", ""), "utf-8");
+
+ // set up query
+ final SolrQuery solrQuery = new SolrQuery()
+ .setQuery(token)
+ .setRows(Integer.MAX_VALUE);
+
+ DUMP.info(solrQuery.toString());
+
+ // query
+ final QueryResponse response =
factory.getSolrServer().query(solrQuery);
+ final SolrDocumentList docs = response.getResults();
+
+
+ // iterate through docs
+ for(SolrDocument doc : docs){
+
+ final String name = (String) doc.getFieldValue("manu");
+ final String exactname = EXACT_PREFIX + name;
+
+ // remove words made solely of characters that the
parser considers whitespace
+ final String hit = ((String) doc.getFieldValue("name"))
+ .replaceAll("\\b" + SKIP_REGEX + "+\\b", " ");
+
+ final String synonym = (String)
doc.getFieldValue("synonym");
+
+ if(factory.usesListName(name, exactname)){
+
+ addMatch(name, hit, synonym, query, result);
+
+ if (hit.equalsIgnoreCase(query.trim())) {
+
+ addMatch(exactname, hit, synonym, query,
result);
+ }
+ }
+ }
+
+ result = Collections.unmodifiableMap(result);
+ CACHE_QUERY.putInCache(query, result);
+ updatedCache = true;
+
+ } catch (SolrServerException ex) {
+ LOG.error(ex.getMessage(), ex);
+ throw new EvaluationException(ERR_QUERY_FAILED + url, ex);
+
+ } catch (UnsupportedEncodingException ignore) {
+ LOG.warn(ERR_FAILED_TO_ENCODE + query);
+ result = (Map<String,
List<TokenMatch>>)nre.getCacheContent();
+
+ } catch (IOException e1) {
+ LOG.error(ERR_QUERY_FAILED + url, e1);
+ result = (Map<String,
List<TokenMatch>>)nre.getCacheContent();
+ throw new EvaluationException(ERR_QUERY_FAILED + url, e1);
+
+ }finally{
+ if(!updatedCache){
+ CACHE_QUERY.cancelUpdate(query);
+ }
+ }
+ }
+ } else {
+ result = Collections.emptyMap();
+ }
+ return result;
+ }
+
+ private static void addMatch(
+ final String name,
+ final String match,
+ final String value,
+ final String query,
+ final Map<String, List<TokenMatch>> result) {
+
+ final String expr = "\\b" + match + "\\b";
+ final Pattern pattern = Pattern.compile(expr, REG_EXP_OPTIONS);
+ final String qNew = query.replaceAll("\\b" + SKIP_REGEX + "+\\b", " ");
+ final Matcher m = pattern.matcher(
+ // remove words made solely of characters that the parser
considers whitespace
+ qNew);
+
+ while (m.find()) {
+ final TokenMatch tknMatch = TokenMatch.instanceOf(name, match,
value);
+
+ if (!result.containsKey(name)) {
+ result.put(name, new ArrayList<TokenMatch>());
+ }
+
+ result.get(name).add(tknMatch);
+
+ if (result.get(name).size() % 100 == 0) {
+ LOG.warn("Pattern: " + pattern.pattern()
+ + " name: " + name
+ + " query: " + query
+ + " match: " + match
+ + " query2: " + qNew);
+ }
+ }
+ }
+
+ private String cleanString(final String string){
+
+ // Strip out SKIP characters we are not interested in.
+ // Also remove any operator characters. (SEARCH-3883 & SEARCH-3967)
+
+ return string
+ .replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces.
multiple spaces always normalised.
+ .replaceAll(SKIP_REGEX, " ")
+ .replaceAll("xxKEEPWSxx", " ") // Hack to keep spaces.
+ .replaceAll(OPERATOR_REGEX, " ")
+ .replaceAll(" +", " "); // normalise
+ }
+
+
+ // Inner classes -------------------------------------------------
+}
Property changes on:
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
___________________________________________________________________
Name: svn:keywords
+ Id
Modified: trunk/generic.sesam/search-command-control/default/pom.xml
===================================================================
--- trunk/generic.sesam/search-command-control/default/pom.xml 2008-08-20
13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/search-command-control/default/pom.xml 2008-08-20
14:00:58 UTC (rev 6785)
@@ -122,7 +122,6 @@
<dependency>
<groupId>org.apache.lucene.solr</groupId>
<artifactId>solrj</artifactId>
- <version>1.3-SNAPSHOT</version>
</dependency>
<!-- testin -->
Added: trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml
===================================================================
--- trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml
(rev 0)
+++ trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml 2008-08-20
14:00:58 UTC (rev 6785)
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE fast-evaluators SYSTEM "http://sesam.no/dtds/VeryFastEvaluators.dtd">
+<!--
+ Document : VeryFastEvaluators.xml
+ Created on : June 9, 2006, 1:26 PM
+ Author : mick
+ Description:
+ Purpose of the document follows.
+-->
+
+<fast-evaluators>
+ <list token="ENGLISHWORDS" list-name="common_english"/>
+ </fast-evaluators>
Modified: trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml
===================================================================
--- trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml
2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml
2008-08-20 14:00:58 UTC (rev 6785)
@@ -1,13 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE fast-evaluators SYSTEM "http://sesam.no/dtds/VeryFastEvaluators.dtd">
-<!--
- Document : VeryFastEvaluators.xml
- Created on : June 9, 2006, 1:26 PM
- Author : mick
- Description:
- Purpose of the document follows.
--->
+<!DOCTYPE fast-evaluators SYSTEM "http://sesam.no/dtds/SolrEvaluators.dtd">
-<fast-evaluators>
+<solr-evaluators>
<list token="ENGLISHWORDS" list-name="common_english"/>
- </fast-evaluators>
+</solr-evaluators>
Modified: trunk/generic.sesam/war/src/main/conf/configuration.properties
===================================================================
--- trunk/generic.sesam/war/src/main/conf/configuration.properties
2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/war/src/main/conf/configuration.properties
2008-08-20 14:00:58 UTC (rev 6785)
@@ -26,6 +26,9 @@
[EMAIL PROTECTED]@
[EMAIL PROTECTED]@
+# Fast Token Evaluator
[EMAIL PROTECTED]@
+
# Relevant Query Suggestions
[EMAIL PROTECTED]@
Modified: trunk/pom.xml
===================================================================
--- trunk/pom.xml 2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/pom.xml 2008-08-20 14:00:58 UTC (rev 6785)
@@ -559,6 +559,11 @@
<type>jar</type>
<classifier>jdk15</classifier>
</dependency>
+ <dependency>
+ <groupId>org.apache.lucene.solr</groupId>
+ <artifactId>solrj</artifactId>
+ <version>1.3-SNAPSHOT</version>
+ </dependency>
</dependencies>
</dependencyManagement>
<dependencies>
_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits