Author: ssmiweve
Date: 2008-08-20 16:00:58 +0200 (Wed, 20 Aug 2008)
New Revision: 6785

Added:
   
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
   
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
   trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml
Modified:
   trunk/generic.sesam/pom.xml
   trunk/generic.sesam/query-evaluation/pom.xml
   trunk/generic.sesam/search-command-control/default/pom.xml
   trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml
   trunk/generic.sesam/war/src/main/conf/configuration.properties
   trunk/pom.xml
Log:
Issue SKER4952:  (Solr TokenEvaluator) 

Modified: trunk/generic.sesam/pom.xml
===================================================================
--- trunk/generic.sesam/pom.xml 2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/pom.xml 2008-08-20 14:00:58 UTC (rev 6785)
@@ -186,6 +186,7 @@
                 <!-- Token Evaluator -->
                 <tokenevaluator.host>localhost</tokenevaluator.host>
                 <tokenevaluator.port>15400</tokenevaluator.port>
+                
<tokenevaluator.solr.serverUrl>http://localhost:16000/solr</tokenevaluator.solr.serverUrl>
 
                 
<schibstedsok_remote_service_url>localhost:1099</schibstedsok_remote_service_url>
                 
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -234,6 +235,7 @@
                 <!-- Token Evaluator -->
                 <tokenevaluator.host>10.16.195.250</tokenevaluator.host>
                 <tokenevaluator.port>15200</tokenevaluator.port>
+                
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
 
                 
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
                 
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -278,6 +280,7 @@
                 <!-- Token Evaluator -->
                 <tokenevaluator.host>10.16.195.250</tokenevaluator.host>
                 <tokenevaluator.port>15200</tokenevaluator.port>
+                
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
 
                 
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
                 
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -327,6 +330,7 @@
                 <!-- Token Evaluator -->
                 <tokenevaluator.host>10.16.195.250</tokenevaluator.host>
                 <tokenevaluator.port>15200</tokenevaluator.port>
+                
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
 
                 
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
                 
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -371,6 +375,7 @@
                 <!-- Token Evaluator -->
                 <tokenevaluator.host>10.16.195.250</tokenevaluator.host>
                 <tokenevaluator.port>15200</tokenevaluator.port>
+                
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
 
                 
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
                 
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -415,6 +420,7 @@
                 <!-- Token Evaluator -->
                 <tokenevaluator.host>10.16.195.250</tokenevaluator.host>
                 <tokenevaluator.port>15200</tokenevaluator.port>
+                
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
 
                 
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
                 
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>
@@ -462,6 +468,7 @@
                 <!-- Token Evaluator -->
                 <tokenevaluator.host>10.16.195.250</tokenevaluator.host>
                 <tokenevaluator.port>15200</tokenevaluator.port>
+                
<tokenevaluator.solr.serverUrl>http://sch-solr-test01.dev.osl.basefarm.net:8080/solr</tokenevaluator.solr.serverUrl>
 
                 
<schibstedsok_remote_service_url>sch-admin01.dev.osl.basefarm.net:1099</schibstedsok_remote_service_url>
                 
<user_service_jndi_name>user-service/UserServiceImpl/remote</user_service_jndi_name>

Modified: trunk/generic.sesam/query-evaluation/pom.xml
===================================================================
--- trunk/generic.sesam/query-evaluation/pom.xml        2008-08-20 13:55:36 UTC 
(rev 6784)
+++ trunk/generic.sesam/query-evaluation/pom.xml        2008-08-20 14:00:58 UTC 
(rev 6785)
@@ -33,5 +33,9 @@
             <artifactId>sesat-site-spi</artifactId>
             <version>${sesat.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.lucene.solr</groupId>
+            <artifactId>solrj</artifactId>
+        </dependency>
     </dependencies>
 </project>

Added: 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
===================================================================
--- 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
                            (rev 0)
+++ 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
    2008-08-20 14:00:58 UTC (rev 6785)
@@ -0,0 +1,325 @@
+/* Copyright (2005-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ *   SESAT is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU Affero General Public License as published 
by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   SESAT is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU Affero General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Affero General Public License
+ *   along with SESAT.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package no.sesat.search.query.token;
+
+import java.net.MalformedURLException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import javax.xml.parsers.ParserConfigurationException;
+import no.schibstedsok.commons.ioc.ContextWrapper;
+
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import no.sesat.search.site.Site;
+import no.sesat.search.site.SiteKeyedFactoryInstantiationException;
+import no.sesat.search.site.config.DocumentLoader;
+import no.sesat.search.site.config.SiteConfiguration;
+import org.apache.log4j.Logger;
+import org.apache.log4j.MDC;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ *
+ * @version <tt>$Id$</tt>
+ */
+public final class SolrEvaluatorFactory extends AbstractEvaluatorFactory{
+
+    // Constants -----------------------------------------------------
+
+    private static final String SOLR_EVALUATOR_XMLFILE = "SolrEvaluators.xml";
+
+    private static final ExecutorService EXECUTOR = 
Executors.newCachedThreadPool();
+
+    private static final Logger LOG = 
Logger.getLogger(SolrEvaluatorFactory.class);
+
+    private static final String ERR_FAILED_CONSTRUCTING_EVALUATOR = "Failed to 
construct the evaluator";
+    private static final String ERR_FAILED_INITIALISATION = "Failed reading 
configuration files";
+
+    private static final String TOKEN_HOST_PROPERTY = 
"tokenevaluator.solr.serverUrl";
+
+
+    // Attributes -----------------------------------------------------
+
+    private final Future solrEvaluatorCreator;
+    private SolrTokenEvaluator solrEvaluator;
+    private SolrServer server;
+
+    private static final Map<Site,Map<TokenPredicate,String[]>> LIST_NAMES
+            = new HashMap<Site,Map<TokenPredicate,String[]>>();
+    private static final ReentrantReadWriteLock LIST_NAMES_LOCK = new 
ReentrantReadWriteLock();
+
+    // Constructors -----------------------------------------------------
+
+    public SolrEvaluatorFactory(final Context cxt) throws 
SiteKeyedFactoryInstantiationException {
+
+        super(cxt);
+
+        try{
+            final Properties props = SiteConfiguration.instanceOf(
+                            
ContextWrapper.wrap(SiteConfiguration.Context.class, cxt)).getProperties();
+
+            final String serverUrl = props.getProperty(TOKEN_HOST_PROPERTY);
+            server = new CommonsHttpSolrServer(serverUrl);
+
+        } catch (MalformedURLException ex) {
+
+            throw new SiteKeyedFactoryInstantiationException(ex.getMessage(), 
ex);
+        }
+
+        solrEvaluatorCreator = EXECUTOR.submit(new SolrEvaluatorCreator(cxt));
+
+        try {
+            init(cxt);
+
+        } catch (ParserConfigurationException ex) {
+
+            throw new SiteKeyedFactoryInstantiationException(ex.getMessage(), 
ex);
+        }
+    }
+
+    // public -----------------------------------------------------
+
+    public TokenEvaluator getEvaluator(final TokenPredicate token) throws 
EvaluationException{
+
+        final Context cxt = getContext();
+
+        TokenEvaluator result = isResponsibleFor(token) ? getSolrEvaluator() : 
null;
+        if(result == null && null != cxt.getSite().getParent()){
+
+            result = instanceOf(ContextWrapper.wrap(
+                    Context.class,
+                    cxt.getSite().getParent().getSiteContext(),
+                    cxt
+                )).getEvaluator(token);
+
+        }
+        if(null == result || TokenEvaluationEngineImpl.ALWAYS_FALSE_EVALUATOR 
== result){
+            // if we cannot find an evaulator, then always fail evaluation.
+            //  Rather than encourage a NullPointerException
+            result = TokenEvaluationEngineImpl.ALWAYS_FALSE_EVALUATOR;
+        }
+        return result;
+    }
+
+    @Override
+    public boolean isResponsibleFor(final TokenPredicate token) {
+
+        return null != getListNames(token);
+    }
+
+
+    // Package protected ---------------------------------------------
+
+    SolrServer getSolrServer(){
+        return server;
+    }
+
+    boolean usesListName(final String listname, final String exactname){
+
+        boolean uses = false;
+        try{
+            LIST_NAMES_LOCK.readLock().lock();
+            Site site = getContext().getSite();
+
+            while(!uses && null != site){
+
+                // find listnames used for this token predicate
+                for(String[] listnames : LIST_NAMES.get(site).values()){
+                    uses |= 0 <= Arrays.binarySearch(listnames, listname, 
null);
+                    uses |= null != exactname && 0 <= 
Arrays.binarySearch(listnames, exactname, null);
+                    if(uses){  break; }
+                }
+
+                // prepare to go to parent
+                site = site.getParent();
+            }
+        }finally{
+            LIST_NAMES_LOCK.readLock().unlock();
+        }
+        return uses;
+    }
+
+    String[] getListNames(final TokenPredicate token){
+
+
+
+        String[] listNames = null;
+        try{
+            LIST_NAMES_LOCK.readLock().lock();
+            Site site = getContext().getSite();
+
+            while(null == listNames && null != site){
+
+                // find listnames used for this token predicate
+                listNames = LIST_NAMES.get(site).get(token);
+
+                // prepare to go to parent
+                site = site.getParent();
+            }
+        }finally{
+            LIST_NAMES_LOCK.readLock().unlock();
+        }
+        return listNames;
+    }
+
+    // private -----------------------------------------------------
+
+    private static void init(final Context cxt) throws 
ParserConfigurationException{
+
+        final Site site = cxt.getSite();
+        final Site parent = site.getParent();
+        final boolean parentUninitialised;
+
+        try{
+            LIST_NAMES_LOCK.readLock().lock();
+
+            // initialise the parent site's configuration
+            parentUninitialised = (null != parent && null == 
LIST_NAMES.get(parent));
+
+        }finally{
+            LIST_NAMES_LOCK.readLock().unlock();
+        }
+
+        if(parentUninitialised){
+            init(ContextWrapper.wrap(
+                    AbstractEvaluatorFactory.Context.class,
+                    parent.getSiteContext(),
+                    cxt
+                ));
+        }
+
+        try{
+            LIST_NAMES_LOCK.writeLock().lock();
+
+            if(null == LIST_NAMES.get(site)){
+
+                // create map entry for this site
+                LIST_NAMES.put(site, new HashMap<TokenPredicate,String[]>());
+
+                // initialise this site's configuration
+                final DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
+                final DocumentBuilder builder = factory.newDocumentBuilder();
+
+                final DocumentLoader loader = cxt.newDocumentLoader(cxt, 
SOLR_EVALUATOR_XMLFILE, builder);
+                loader.abut();
+
+                LOG.info("Parsing " + SOLR_EVALUATOR_XMLFILE + " started");
+                final Map<TokenPredicate,String[]> listNames = 
LIST_NAMES.get(site);
+                final Document doc = loader.getDocument();
+
+                if(null != doc && null != doc.getDocumentElement()){
+
+                    final Element root = doc.getDocumentElement();
+                    final NodeList lists = root.getElementsByTagName("list");
+                    for (int i = 0; i < lists.getLength(); ++i) {
+
+                        final Element list = (Element) lists.item(i);
+
+                        final String tokenName = list.getAttribute("token");
+                        LOG.info(" ->[EMAIL PROTECTED]: " + tokenName);
+
+                        TokenPredicate token;
+                        try{
+                            token = 
TokenPredicateUtility.getTokenPredicate(tokenName);
+
+                        }catch(IllegalArgumentException iae){
+                            LOG.debug(tokenName + " does not exist. Will 
create it. Underlying exception was " + iae);
+                            token = 
TokenPredicateUtility.createAnonymousTokenPredicate(tokenName);
+                        }
+
+                        final String[] listNameArr = 
list.getAttribute("list-name").split(",");
+                        LOG.info(" ->lists: " + 
list.getAttribute("list-name"));
+
+                        // update each listname to the format the fast query 
matching servers use
+                        if(null != listNameArr){
+                            for(int j = 0; j < listNameArr.length; ++j){
+                                listNameArr[j] = listNameArr[j];
+                            }
+
+                            // put the listnames in
+                            Arrays.sort(listNameArr, null);
+                            listNames.put(token, listNameArr);
+                        }
+
+
+                    }
+                }
+                LOG.info("Parsing " + SOLR_EVALUATOR_XMLFILE + " finished");
+            }
+        }finally{
+            LIST_NAMES_LOCK.writeLock().unlock();
+        }
+    }
+
+    private SolrTokenEvaluator getSolrEvaluator() throws EvaluationException {
+
+        try {
+            solrEvaluatorCreator.get();
+
+        } catch (InterruptedException ex) {
+            LOG.error(ex.getMessage(), ex);
+            throw new EvaluationException(ex.getMessage(), ex);
+        } catch (ExecutionException ex) {
+            LOG.error(ex.getMessage(), ex);
+            throw new EvaluationException(ex.getMessage(), ex);
+        }
+        if( null == solrEvaluator ){
+            throw new EvaluationException("NPE", new NullPointerException());
+        }
+
+        return solrEvaluator;
+    }
+
+
+    // inner classes -----------------------------------------------------
+
+    private final class SolrEvaluatorCreator implements Runnable{
+
+        private final Context context;
+
+        private SolrEvaluatorCreator(final Context cxt) {
+
+            this.context = cxt;
+        }
+
+        public void run() {
+
+            MDC.put("UNIQUE_ID", context.getUniqueId());
+            try {
+                solrEvaluator = new SolrTokenEvaluator(context, 
SolrEvaluatorFactory.this);
+
+            } catch (EvaluationException ex) {
+                LOG.error(ERR_FAILED_CONSTRUCTING_EVALUATOR);
+            }
+
+            MDC.remove("UNIQUE_ID");
+        }
+
+    }
+
+}


Property changes on: 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrEvaluatorFactory.java
___________________________________________________________________
Name: svn:keywords
   + Id

Added: 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
===================================================================
--- 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
                              (rev 0)
+++ 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
      2008-08-20 14:00:58 UTC (rev 6785)
@@ -0,0 +1,324 @@
+/* Copyright (2005-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ *   SESAT is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU Affero General Public License as published 
by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   SESAT is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU Affero General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Affero General Public License
+ *   along with SESAT.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package no.sesat.search.query.token;
+
+import com.opensymphony.oscache.base.NeedsRefreshException;
+import com.opensymphony.oscache.general.GeneralCacheAdministrator;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import no.sesat.search.query.token.AbstractEvaluatorFactory.Context;
+import static no.sesat.search.query.parser.AbstractQueryParser.SKIP_REGEX;
+import static no.sesat.search.query.parser.AbstractQueryParser.OPERATOR_REGEX;
+import org.apache.log4j.Logger;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+
+/**
+ *
+ * @version $Id$
+ */
+public final class SolrTokenEvaluator implements TokenEvaluator{
+
+    // Constants -----------------------------------------------------
+
+    private static final Logger LOG = 
Logger.getLogger(SolrTokenEvaluator.class);
+    private static final Logger DUMP = 
Logger.getLogger("no.sesat.search.Dump");
+
+    /** General properties to regular expressions configured. **/
+    private static final int REG_EXP_OPTIONS = Pattern.CASE_INSENSITIVE | 
Pattern.UNICODE_CASE;
+    // use the lowercase version of TokenPredicate.EXACT_PREFIX
+    private static final String EXACT_PREFIX = 
TokenPredicate.EXACT_PREFIX.toLowerCase();
+
+
+    private static final GeneralCacheAdministrator CACHE_QUERY = new 
GeneralCacheAdministrator();
+    private static final int REFRESH_PERIOD = 60;
+    // smaller than usual as each entry can contain up to 600 values!
+    private static final int CACHE_QUERY_CAPACITY = 100;
+
+    private static final String ERR_QUERY_FAILED = "Querying Solr failed on ";
+    private static final String ERR_FAILED_TO_ENCODE = "Failed to encode query 
string: ";
+
+    // Attributes ----------------------------------------------------
+
+    private final Context context;
+    private SolrEvaluatorFactory factory;
+    private final Map<String, List<TokenMatch>> analysisResult;
+
+    // Static --------------------------------------------------------
+
+    static{
+        CACHE_QUERY.setCacheCapacity(CACHE_QUERY_CAPACITY);
+    }
+
+    // Constructors --------------------------------------------------
+
+    /**
+     *
+     * @param cxt
+     * @param factory
+     * @throws EvaluationException
+     */
+    public SolrTokenEvaluator(final Context cxt, final SolrEvaluatorFactory 
factory) throws EvaluationException{
+
+        context = cxt;
+        this.factory = factory;
+
+        // Remove whitespace (except space itself) and operator characters.
+        analysisResult = query(cleanString(cxt.getQueryString()));
+    }
+
+    // Public --------------------------------------------------------
+
+
+    public boolean evaluateToken(final TokenPredicate token, final String 
term, final String query) {
+
+        boolean evaluation = false;
+        final String[] listnames = factory.getListNames(token);
+
+        if(null != listnames){
+            for(int i = 0; !evaluation && i < listnames.length; ++i){
+
+                final String listname = listnames[i];
+
+                if (analysisResult.containsKey(listname)) {
+                    if (term == null) {
+                        evaluation = true;
+                    }  else  {
+
+                        // HACK since DefaultOperatorClause wraps its children 
in parenthesis
+                        final String hackTerm = 
cleanString(term.replaceAll("\\(|\\)",""));
+
+                        for (TokenMatch occurance : 
analysisResult.get(listname)) {
+
+                            final Matcher m = occurance.getMatcher(hackTerm);
+                            evaluation = m.find() && m.start() == 0 && m.end() 
== hackTerm.length();
+
+                            if (evaluation) {
+                                break;
+                            }
+                        }
+                    }
+
+                }
+            }
+        }else{
+            LOG.info(context.getSite() + " does not define lists behind the 
token predicate " + token);
+        }
+        return evaluation;
+    }
+
+
+    /**
+     * get all match values and values for given list .
+     *
+     * @param token
+     * @param term
+     * @return a list of Tokens
+     */
+    public Set<String> getMatchValues(final TokenPredicate token, final String 
term) {
+
+        final Set<String> values = new HashSet<String>();
+
+        final String[] listnames = factory.getListNames(token);
+        if(null != listnames){
+            for(int i = 0; i < listnames.length; i++){
+                final String listname = listnames[i];
+                if (analysisResult.containsKey(listname)) {
+
+                    // HACK since DefaultOperatorClause wraps its children in 
parenthesis
+                    final String hackTerm = 
cleanString(term.replaceAll("\\(|\\)",""));
+
+                    for (TokenMatch occurance : analysisResult.get(listname)) {
+
+                        final Matcher m = occurance.getMatcher(hackTerm);
+
+                        if (m.find() && m.start() == 0 && m.end() == 
hackTerm.length()) {
+                            values.add(occurance.getValue());
+                        }
+                    }
+                }
+            }
+        }
+        return Collections.unmodifiableSet(values);
+    }
+
+
+    public boolean isQueryDependant(TokenPredicate predicate) {
+
+        return predicate.name().startsWith(EXACT_PREFIX.toUpperCase());
+    }
+
+    // Z implementation ----------------------------------------------
+
+    // Y overrides ---------------------------------------------------
+
+    // Package protected ---------------------------------------------
+
+    // Protected -----------------------------------------------------
+
+    // Private -------------------------------------------------------
+
+    /**
+     * Search solr and find out if the given tokens are company, firstname, 
lastname etc
+     * @param query
+     */
+    @SuppressWarnings("unchecked")
+    private Map<String, List<TokenMatch>> query(final String query) throws 
EvaluationException{
+
+        LOG.trace("queryFast( " + query + " )");
+        Map<String, List<TokenMatch>> result = null;
+
+        if (query != null && 0 < query.length()) {
+
+            try{
+                result = (Map<String, List<TokenMatch>>) 
CACHE_QUERY.getFromCache(query, REFRESH_PERIOD);
+
+            } catch (NeedsRefreshException nre) {
+
+                boolean updatedCache = false;
+                result = new HashMap<String,List<TokenMatch>>();
+                String url = null;
+
+                try {
+                    final String token = 
URLEncoder.encode(query.replaceAll("\"", ""), "utf-8");
+
+                    // set up query
+                    final SolrQuery solrQuery = new SolrQuery()
+                            .setQuery(token)
+                            .setRows(Integer.MAX_VALUE);
+
+                    DUMP.info(solrQuery.toString());
+
+                    // query
+                    final QueryResponse response = 
factory.getSolrServer().query(solrQuery);
+                    final SolrDocumentList docs = response.getResults();
+
+
+                    // iterate through docs
+                    for(SolrDocument doc : docs){
+
+                        final String name = (String) doc.getFieldValue("manu");
+                        final String exactname = EXACT_PREFIX + name;
+
+                        // remove words made solely of characters that the 
parser considers whitespace
+                        final String hit = ((String) doc.getFieldValue("name"))
+                                .replaceAll("\\b" + SKIP_REGEX + "+\\b", " ");
+
+                        final String synonym = (String) 
doc.getFieldValue("synonym");
+
+                        if(factory.usesListName(name, exactname)){
+
+                            addMatch(name, hit, synonym, query, result);
+
+                            if (hit.equalsIgnoreCase(query.trim())) {
+
+                                addMatch(exactname, hit, synonym, query, 
result);
+                            }
+                        }
+                    }
+
+                    result = Collections.unmodifiableMap(result);
+                    CACHE_QUERY.putInCache(query, result);
+                    updatedCache = true;
+
+                } catch (SolrServerException ex) {
+                    LOG.error(ex.getMessage(), ex);
+                    throw new EvaluationException(ERR_QUERY_FAILED + url, ex);
+
+                } catch (UnsupportedEncodingException ignore) {
+                    LOG.warn(ERR_FAILED_TO_ENCODE + query);
+                    result = (Map<String, 
List<TokenMatch>>)nre.getCacheContent();
+
+                } catch (IOException e1) {
+                    LOG.error(ERR_QUERY_FAILED + url, e1);
+                    result = (Map<String, 
List<TokenMatch>>)nre.getCacheContent();
+                    throw new EvaluationException(ERR_QUERY_FAILED + url, e1);
+
+                }finally{
+                    if(!updatedCache){
+                        CACHE_QUERY.cancelUpdate(query);
+                    }
+                }
+            }
+        } else {
+            result = Collections.emptyMap();
+        }
+        return result;
+    }
+
+    private static void addMatch(
+            final String name,
+            final String match,
+            final String value,
+            final String query,
+            final Map<String, List<TokenMatch>> result) {
+
+        final String expr = "\\b" + match + "\\b";
+        final Pattern pattern = Pattern.compile(expr, REG_EXP_OPTIONS);
+        final String qNew = query.replaceAll("\\b" + SKIP_REGEX + "+\\b", " ");
+        final Matcher m = pattern.matcher(
+                // remove words made solely of characters that the parser 
considers whitespace
+                qNew);
+
+        while (m.find()) {
+            final TokenMatch tknMatch = TokenMatch.instanceOf(name, match, 
value);
+
+            if (!result.containsKey(name)) {
+                result.put(name, new ArrayList<TokenMatch>());
+            }
+
+            result.get(name).add(tknMatch);
+
+            if (result.get(name).size() % 100 == 0) {
+                LOG.warn("Pattern: " + pattern.pattern()
+                        + " name: " + name
+                        + " query: " + query
+                        + " match: " + match
+                        + " query2: " + qNew);
+            }
+        }
+    }
+
+    private String cleanString(final String string){
+
+        // Strip out SKIP characters we are not interested in.
+        // Also remove any operator characters. (SEARCH-3883 & SEARCH-3967)
+
+        return string
+                .replaceAll(" ", "xxKEEPWSxx") // Hack to keep spaces. 
multiple spaces always normalised.
+                .replaceAll(SKIP_REGEX, " ")
+                .replaceAll("xxKEEPWSxx", " ") // Hack to keep spaces.
+                .replaceAll(OPERATOR_REGEX, " ")
+                .replaceAll(" +", " "); // normalise
+    }
+
+
+    // Inner classes -------------------------------------------------
+}


Property changes on: 
trunk/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
___________________________________________________________________
Name: svn:keywords
   + Id

Modified: trunk/generic.sesam/search-command-control/default/pom.xml
===================================================================
--- trunk/generic.sesam/search-command-control/default/pom.xml  2008-08-20 
13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/search-command-control/default/pom.xml  2008-08-20 
14:00:58 UTC (rev 6785)
@@ -122,7 +122,6 @@
         <dependency>
             <groupId>org.apache.lucene.solr</groupId>
             <artifactId>solrj</artifactId>
-            <version>1.3-SNAPSHOT</version>
         </dependency>
         
         <!-- testin -->

Added: trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml
===================================================================
--- trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml                    
        (rev 0)
+++ trunk/generic.sesam/war/src/main/conf/SolrEvaluators.xml    2008-08-20 
14:00:58 UTC (rev 6785)
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE fast-evaluators SYSTEM "http://sesam.no/dtds/VeryFastEvaluators.dtd";>
+<!--
+    Document   : VeryFastEvaluators.xml
+    Created on : June 9, 2006, 1:26 PM
+    Author     : mick
+    Description:
+        Purpose of the document follows.
+-->
+
+<fast-evaluators>
+    <list token="ENGLISHWORDS" list-name="common_english"/>
+ </fast-evaluators>

Modified: trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml
===================================================================
--- trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml        
2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/war/src/main/conf/VeryFastEvaluators.xml        
2008-08-20 14:00:58 UTC (rev 6785)
@@ -1,13 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE fast-evaluators SYSTEM "http://sesam.no/dtds/VeryFastEvaluators.dtd";>
-<!--
-    Document   : VeryFastEvaluators.xml
-    Created on : June 9, 2006, 1:26 PM
-    Author     : mick
-    Description:
-        Purpose of the document follows.
--->
+<!DOCTYPE fast-evaluators SYSTEM "http://sesam.no/dtds/SolrEvaluators.dtd";>
 
-<fast-evaluators>
+<solr-evaluators>
     <list token="ENGLISHWORDS" list-name="common_english"/>
- </fast-evaluators>
+</solr-evaluators>

Modified: trunk/generic.sesam/war/src/main/conf/configuration.properties
===================================================================
--- trunk/generic.sesam/war/src/main/conf/configuration.properties      
2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/generic.sesam/war/src/main/conf/configuration.properties      
2008-08-20 14:00:58 UTC (rev 6785)
@@ -26,6 +26,9 @@
 [EMAIL PROTECTED]@
 [EMAIL PROTECTED]@
 
+# Fast Token Evaluator
[EMAIL PROTECTED]@
+
 # Relevant Query Suggestions
 [EMAIL PROTECTED]@
 

Modified: trunk/pom.xml
===================================================================
--- trunk/pom.xml       2008-08-20 13:55:36 UTC (rev 6784)
+++ trunk/pom.xml       2008-08-20 14:00:58 UTC (rev 6785)
@@ -559,6 +559,11 @@
                 <type>jar</type>
                 <classifier>jdk15</classifier>
             </dependency>
+            <dependency>
+                <groupId>org.apache.lucene.solr</groupId>
+                <artifactId>solrj</artifactId>
+                <version>1.3-SNAPSHOT</version>
+            </dependency>
         </dependencies>
     </dependencyManagement>
     <dependencies>

_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits

Reply via email to