Author: cutting
Date: Tue Apr 19 14:36:26 2005
New Revision: 161984
URL: http://svn.apache.org/viewcvs?view=rev&rev=161984
Log:
Make query boosts configurable. Patch by Piotr Kosiorowski.
Modified:
incubator/nutch/trunk/CHANGES.txt
incubator/nutch/trunk/conf/nutch-default.xml
incubator/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
Modified: incubator/nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewcvs/incubator/nutch/trunk/CHANGES.txt?view=diff&r1=161983&r2=161984
==============================================================================
--- incubator/nutch/trunk/CHANGES.txt (original)
+++ incubator/nutch/trunk/CHANGES.txt Tue Apr 19 14:36:26 2005
@@ -70,6 +70,9 @@
scoring by link count when generating fetchlists and searching.
(cutting, 20040419)
+15. Make query boosts for host, title, anchor and phrase matches
+ configurable. (Piotr Kosiorowski via cutting, 20050419)
+
Release 0.6
Modified: incubator/nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewcvs/incubator/nutch/trunk/conf/nutch-default.xml?view=diff&r1=161983&r2=161984
==============================================================================
--- incubator/nutch/trunk/conf/nutch-default.xml (original)
+++ incubator/nutch/trunk/conf/nutch-default.xml Tue Apr 19 14:36:26 2005
@@ -669,4 +669,43 @@
</description>
</property>
+<!-- query-basic plugin properties -->
+
+<property>
+ <name>query.url.boost</name>
+ <value>4.0</value>
+ <description> Used as a boost for url field in Lucene query.
+ </description>
+</property>
+
+<property>
+ <name>query.anchor.boost</name>
+ <value>2.0</value>
+ <description> Used as a boost for anchor field in Lucene query.
+ </description>
+</property>
+
+
+<property>
+ <name>query.title.boost</name>
+ <value>1.5</value>
+ <description> Used as a boost for title field in Lucene query.
+ </description>
+</property>
+
+<property>
+ <name>query.host.boost</name>
+ <value>2.0</value>
+ <description> Used as a boost for host field in Lucene query.
+ </description>
+</property>
+
+<property>
+ <name>query.phrase.boost</name>
+ <value>1.0</value>
+ <description> Used as a boost for phrase in Lucene query.
+ Multiplied by boost for field phrase is matched in.
+ </description>
+</property>
+
</nutch-conf>
Modified:
incubator/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
URL:
http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java?view=diff&r1=161983&r2=161984
==============================================================================
---
incubator/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
(original)
+++
incubator/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
Tue Apr 19 14:36:26 2005
@@ -26,6 +26,7 @@
import org.apache.nutch.searcher.QueryFilter;
import org.apache.nutch.searcher.Query;
import org.apache.nutch.searcher.Query.*;
+import org.apache.nutch.util.NutchConf;
import java.io.IOException;
import java.util.HashSet;
@@ -33,14 +34,23 @@
/** The default query filter. Query terms in the default query field are
* expanded to search the url, anchor and content document fields.*/
public class BasicQueryFilter implements QueryFilter {
+
+ private static float URL_BOOST =
+ NutchConf.get().getFloat("query.url.boost", 4.0f);
- private static float URL_BOOST = 4.0f;
- private static float ANCHOR_BOOST = 2.0f;
- private static float TITLE_BOOST = 1.5f;
- private static float HOST_BOOST = 2.0f;
+ private static float ANCHOR_BOOST =
+ NutchConf.get().getFloat("query.anchor.boost", 2.0f);
+
+ private static float TITLE_BOOST =
+ NutchConf.get().getFloat("query.title.boost", 1.5f);
+
+ private static float HOST_BOOST =
+ NutchConf.get().getFloat("query.host.boost", 2.0f);
private static int SLOP = Integer.MAX_VALUE;
- private static float PHRASE_BOOST = 1.0f;
+
+ private static float PHRASE_BOOST =
+ NutchConf.get().getFloat("query.phrase.boost", 1.0f);
private static final String[] FIELDS =
{ "url", "anchor", "content", "title", "host" };
-------------------------------------------------------
This SF.Net email is sponsored by: New Crystal Reports XI.
Version 11 adds new functionality designed to reduce time involved in
creating, integrating, and deploying reporting solutions. Free runtime info,
new features, or free trial, at: http://www.businessobjects.com/devxi/728
_______________________________________________
Nutch-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs