Author: gsingers
Date: Tue Jun 17 05:48:57 2008
New Revision: 668638
URL: http://svn.apache.org/viewvc?rev=668638&view=rev
Log:
SOLR-595: Added support for Field level boosting in MoreLikeThis
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
lucene/solr/trunk/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
lucene/solr/trunk/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java
Modified: lucene/solr/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=668638&r1=668637&r2=668638&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Jun 17 05:48:57 2008
@@ -288,6 +288,8 @@
51. SOLR-536: Add a DocumentObjectBinder to solrj that converts Objects to and
from SolrDocuments. (Noble Paul via ryan)
+
+52. SOLR-595: Add support for Field level boosting in the MoreLikeThis
Handler. (Tom Morton, gsingers)
Changes in runtime behavior
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This
Modified:
lucene/solr/trunk/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/params/MoreLikeThisParams.java?rev=668638&r1=668637&r2=668638&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
Tue Jun 17 05:48:57 2008
@@ -35,6 +35,7 @@
public final static String MAX_QUERY_TERMS = PREFIX + "maxqt";
public final static String MAX_NUM_TOKENS_PARSED = PREFIX + "maxntp";
public final static String BOOST = PREFIX + "boost"; //
boost or not?
+ public final static String QF = PREFIX + "qf"; //boosting
applied to mlt fields
// the /mlt request handler uses 'rows'
public final static String DOC_COUNT = PREFIX + "count";
Modified:
lucene/solr/trunk/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/MoreLikeThisHandler.java?rev=668638&r1=668637&r2=668638&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
Tue Jun 17 05:48:57 2008
@@ -23,8 +23,11 @@
import java.net.URL;
import java.util.ArrayList;
import java.util.Comparator;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
@@ -37,6 +40,7 @@
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.SolrParams;
@@ -51,7 +55,6 @@
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QueryParsing;
@@ -131,7 +134,6 @@
// Matching options
boolean includeMatch = params.getBool( MoreLikeThisParams.MATCH_INCLUDE,
true );
int matchOffset = params.getInt( MoreLikeThisParams.MATCH_OFFSET, 0 );
-
// Find the base match
Query query = QueryParsing.parseQuery(q, params.get(CommonParams.DF),
params, req.getSchema());
DocList match = searcher.getDocList(query, null, null, matchOffset, 1,
flags ); // only get the first one...
@@ -231,6 +233,7 @@
final IndexReader reader;
final SchemaField uniqueKeyField;
final boolean needDocSet;
+ Map<String,Float> boostFields;
Query mltquery; // expose this for debugging
@@ -260,12 +263,27 @@
mlt.setMaxQueryTerms(
params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS,
MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED,
MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setBoost( params.getBool(MoreLikeThisParams.BOOST, false
) );
+ boostFields =
SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF));
+ }
+
+ private void setBoosts(Query mltquery) {
+ if (boostFields.size() > 0) {
+ List clauses = ((BooleanQuery)mltquery).clauses();
+ for( Object o : clauses ) {
+ TermQuery q = (TermQuery)((BooleanClause)o).getQuery();
+ Float b = this.boostFields.get(q.getTerm().field());
+ if (b != null) {
+ q.setBoost(b*q.getBoost());
+ }
+ }
+ }
}
public DocListAndSet getMoreLikeThis( int id, int start, int rows,
List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
Document doc = reader.document(id);
mltquery = mlt.like(id);
+ setBoosts(mltquery);
if( terms != null ) {
fillInterestingTermsFromMLTQuery( mltquery, terms );
}
@@ -289,6 +307,7 @@
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows,
List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
mltquery = mlt.like(reader);
+ setBoosts(mltquery);
if( terms != null ) {
fillInterestingTermsFromMLTQuery( mltquery, terms );
}
Modified:
lucene/solr/trunk/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java?rev=668638&r1=668637&r2=668638&view=diff
==============================================================================
---
lucene/solr/trunk/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java
(original)
+++
lucene/solr/trunk/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java
Tue Jun 17 05:48:57 2008
@@ -21,11 +21,15 @@
import java.util.HashMap;
import java.util.Map;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.AbstractSolrTestCase;
@@ -43,6 +47,7 @@
lrf = h.getRequestFactory("standard", 0, 20 );
}
+
public void testInterface()
{
SolrCore core = h.getCore();
@@ -67,5 +72,35 @@
mlt.handleRequestBody( req, new SolrQueryResponse() );
}
catch( Exception ex ) {} // expected
+
+ assertU(adoc(new String[]{"id","42","name","Tom Cruise","subword","Top
Gun","subword","Risky Business","subword","The Color of
Money","subword","Minority Report","subword", "Days of Thunder","subword",
"Eyes Wide Shut","subword", "Far and Away"}));
+ assertU(adoc(new String[]{"id","43","name","Tom Hanks","subword","The
Green Mile","subword","Forest Gump","subword","Philadelphia
Story","subword","Big","subword","Cast Away"}));
+ assertU(adoc(new String[]{"id","44","name","Harrison Ford","subword","Star
Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding
Henry"}));
+ assertU(adoc(new String[]{"id","45","name","George
Harrison","subword","Yellow Submarine","subword","Help","subword","Magical
Mystery Tour","subword","Sgt. Peppers Lonley Hearts Club Band"}));
+ assertU(adoc(new String[]{"id","46","name","Nicole
Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide
Shut","subword","Far and Away"}));
+ assertU(commit());
+
+ params.put(CommonParams.Q, new String[]{"id:42"});
+ params.put(MoreLikeThisParams.MLT, new String[]{"true"});
+ params.put(MoreLikeThisParams.SIMILARITY_FIELDS, new
String[]{"name,subword"});
+ params.put(MoreLikeThisParams.INTERESTING_TERMS,new String[]{"details"});
+ params.put(MoreLikeThisParams.MIN_TERM_FREQ,new String[]{"1"});
+ params.put(MoreLikeThisParams.MIN_DOC_FREQ,new String[]{"1"});
+
+ SolrQueryRequest mltreq = new LocalSolrQueryRequest( core,
(SolrParams)mmparams);
+ assertQ("morelikethis - tom cruise",mltreq
+ ,"//result/doc[1]/[EMAIL PROTECTED]'id'][.='46']"
+ ,"//result/doc[2]/[EMAIL PROTECTED]'id'][.='43']");
+
+ params.put(CommonParams.Q, new String[]{"id:44"});
+ assertQ("morelike this - harrison ford",mltreq
+ ,"//result/doc[1]/[EMAIL PROTECTED]'id'][.='45']");
+
+ params.put(CommonParams.Q, new String[]{"id:42"});
+ params.put(MoreLikeThisParams.QF,new String[]{"name^5.0 subword^0.1"});
+ assertQ("morelikethis with weights",mltreq
+ ,"//result/doc[1]/[EMAIL PROTECTED]'id'][.='43']"
+ ,"//result/doc[2]/[EMAIL PROTECTED]'id'][.='46']");
+
}
}