Author: sshafroi
Date: 2008-12-15 17:09:01 +0100 (Mon, 15 Dec 2008)
New Revision: 7103

Modified:
   
branches/2.18/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
Log:
SEARCH-5262 Solr indexes to handle 2.18 query matching traffic

Reduce number of results we ask form from Integer.max to 25. If there are more 
results then 25, ask for the rest in a new query. This improves speed 
dramatically from 7000ms to 2ms in some cases. I haven't measured this compared 
to the fast predicates. We also used to get OutOfMemory for this queries, but 
this has gone now.


Modified: 
branches/2.18/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
===================================================================
--- 
branches/2.18/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
      2008-12-15 15:56:01 UTC (rev 7102)
+++ 
branches/2.18/generic.sesam/query-evaluation/src/main/java/no/sesat/search/query/token/SolrTokenEvaluator.java
      2008-12-15 16:09:01 UTC (rev 7103)
@@ -18,9 +18,6 @@
 
 import com.opensymphony.oscache.base.NeedsRefreshException;
 import com.opensymphony.oscache.general.GeneralCacheAdministrator;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -62,6 +59,7 @@
     private static final int REFRESH_PERIOD = 60;
     // smaller than usual as each entry can contain up to 600 values!
     private static final int CACHE_QUERY_CAPACITY = 100;
+    private static final int INITIAL_ROWS_TO_FETCH = 25;
 
     private static final String ERR_QUERY_FAILED = "Querying Solr failed on ";
     private static final String ERR_FAILED_TO_ENCODE = "Failed to encode query 
string: ";
@@ -216,7 +214,7 @@
                     // set up query
                     final SolrQuery solrQuery = new SolrQuery()
                             .setQuery("list_entry_shingle:\"" + token + "\"")
-                            .setRows(Integer.MAX_VALUE);
+                            .setRows(INITIAL_ROWS_TO_FETCH);
 
                     // when the root logger is set to DEBUG do not limit 
connection times
                     
if(Logger.getRootLogger().getLevel().isGreaterOrEqual(Level.INFO)){
@@ -224,36 +222,54 @@
                         solrQuery.setTimeAllowed(500);
                     }
 
-                    DUMP.info(solrQuery.toString());
+                    // query for hits
+                    QueryResponse response = 
factory.getSolrServer().query(solrQuery);
+                    final int numberOfHits = 
(int)response.getResults().getNumFound();
 
-                    // query
-                    final QueryResponse response = 
factory.getSolrServer().query(solrQuery);
-                    final SolrDocumentList docs = response.getResults();
+                    boolean more = false;
+                    do {
+                        DUMP.info(solrQuery.toString());
 
+                        final SolrDocumentList docs = response.getResults();
 
-                    // iterate through docs
-                    for(SolrDocument doc : docs){
+                        // iterate through docs
+                        for(SolrDocument doc : docs){
 
-                        final String name = (String) 
doc.getFieldValue("list_name");
-                        final String exactname = EXACT_PREFIX + name;
+                            final String name = (String) 
doc.getFieldValue("list_name");
+                            final String exactname = EXACT_PREFIX + name;
 
-                        // remove words made solely of characters that the 
parser considers whitespace
-                        final String hit = ((String) 
doc.getFieldValue("list_entry"))
-                                .replaceAll("\\b" + SKIP_REGEX + "+\\b", " ");
+                            // remove words made solely of characters that the 
parser considers whitespace
+                            final String hit = ((String) 
doc.getFieldValue("list_entry"))
+                                    .replaceAll("\\b" + SKIP_REGEX + "+\\b", " 
");
 
-                        final String synonym = (String) 
doc.getFieldValue("list_entry_synonym");
+                            final String synonym = (String) 
doc.getFieldValue("list_entry_synonym");
 
-                        if(factory.usesListName(name, exactname)){
+                            if(factory.usesListName(name, exactname)){
 
-                            addMatch(name, hit, synonym, query, result);
+                                addMatch(name, hit, synonym, query, result);
 
-                            if (hit.equalsIgnoreCase(query.trim())) {
+                                if (hit.equalsIgnoreCase(query.trim())) {
 
-                                addMatch(exactname, hit, synonym, query, 
result);
+                                    addMatch(exactname, hit, synonym, query, 
result);
+                                }
                             }
                         }
+
+                        int rest = numberOfHits - INITIAL_ROWS_TO_FETCH;
+                        if (!more && rest > 0) {
+                            more = true;
+                            solrQuery.setStart(INITIAL_ROWS_TO_FETCH + 1);
+                            solrQuery.setRows(rest);
+                            // query
+                            response = 
factory.getSolrServer().query(solrQuery);
+                        }
+                        else {
+                            more = false;
+                        }
                     }
+                    while (more);
 
+
                     result = Collections.unmodifiableMap(result);
                     CACHE_QUERY.putInCache(query, result);
                     updatedCache = true;

_______________________________________________
Kernel-commits mailing list
Kernel-commits@sesat.no
http://sesat.no/mailman/listinfo/kernel-commits

Reply via email to