nutch-default.xml

jnioche Wed, 05 Jan 2011 08:18:31 -0800

Author: jnioche
Date: Wed Jan  5 16:17:37 2011
New Revision: 1055503

URL: http://svn.apache.org/viewvc?rev=1055503&view=rev
Log:
ported NUTCH-883 to 1.3 (seeNUTCH-951)


Modified:
    nutch/branches/branch-1.3/CHANGES.txt
    nutch/branches/branch-1.3/conf/nutch-default.xml

Modified: nutch/branches/branch-1.3/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.3/CHANGES.txt?rev=1055503&r1=1055502&r2=1055503&view=diff
==============================================================================
--- nutch/branches/branch-1.3/CHANGES.txt (original)
+++ nutch/branches/branch-1.3/CHANGES.txt Wed Jan  5 16:17:37 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.3 - Current Development
 
+* NUTCH-883 Remove unused parameters from nutch-default.xml (jnioche) 
+
 * NUTCH-936 LanguageIdentifier should not set empty lang field on 
NutchDocument (Markus Jelsma via jnioche)
 
 * NUTCH-855 ScoringFilter and IndexingFilter: To allow for the propagation of 
URL Metatags and their subsequent indexing (Scott Gonyea via mattmann)

Modified: nutch/branches/branch-1.3/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.3/conf/nutch-default.xml?rev=1055503&r1=1055502&r2=1055503&view=diff
==============================================================================
--- nutch/branches/branch-1.3/conf/nutch-default.xml (original)
+++ nutch/branches/branch-1.3/conf/nutch-default.xml Wed Jan  5 16:17:37 2011
@@ -682,86 +682,6 @@
   </description>
 </property>
 
-<!-- indexer properties -->
-
-<property>
-  <name>indexer.score.power</name>
-  <value>0.5</value>
-  <description>Determines the power of link analyis scores.  Each
-  pages's boost is set to <i>score<sup>scorePower</sup></i> where
-  <i>score</i> is its link analysis score and <i>scorePower</i> is the
-  value of this parameter.  This is compiled into indexes, so, when
-  this is changed, pages must be re-indexed for it to take
-  effect.</description>
-</property>
-
-<property>
-  <name>indexer.max.title.length</name>
-  <value>100</value>
-  <description>The maximum number of characters of a title that are indexed.
-  </description>
-</property>
-
-<property>
-  <name>indexer.max.tokens</name>
-  <value>10000</value>
-  <description>
-  The maximum number of tokens that will be indexed for a single field
-  in a document. This limits the amount of memory required for
-  indexing, so that collections with very large files will not crash
-  the indexing process by running out of memory.
-
-  Note that this effectively truncates large documents, excluding
-  from the index tokens that occur further in the document. If you
-  know your source documents are large, be sure to set this value
-  high enough to accomodate the expected size. If you set it to
-  -1, then the only limit is your memory, but you should anticipate
-  an OutOfMemoryError.
-  </description>
-</property>
-
-<property>
-  <name>indexer.mergeFactor</name>
-  <value>50</value>
-  <description>The factor that determines the frequency of Lucene segment
-  merges. This must not be less than 2, higher values increase indexing
-  speed but lead to increased RAM usage, and increase the number of
-  open file handles (which may lead to "Too many open files" errors).
-  NOTE: the "segments" here have nothing to do with Nutch segments, they
-  are a low-level data unit used by Lucene.
-  </description>
-</property>
-
-<property>
-  <name>indexer.minMergeDocs</name>
-  <value>50</value>
-  <description>This number determines the minimum number of Lucene
-  Documents buffered in memory between Lucene segment merges. Larger
-  values increase indexing speed and increase RAM usage.
-  </description>
-</property>
-
-<property>
-  <name>indexer.maxMergeDocs</name>
-  <value>2147483647</value>
-  <description>This number determines the maximum number of Lucene
-  Documents to be merged into a new Lucene segment. Larger values
-  increase batch indexing speed and reduce the number of Lucene segments,
-  which reduces the number of open file handles; however, this also
-  decreases incremental indexing performance.
-  </description>
-</property>
-
-<property>
-  <name>indexer.termIndexInterval</name>
-  <value>128</value>
-  <description>Determines the fraction of terms which Lucene keeps in
-  RAM when searching, to facilitate random-access.  Smaller values use
-  more memory but make searches somewhat faster.  Larger values use
-  less memory but make searches somewhat slower.
-  </description>
-</property>
-
 <!-- moreindexingfilter plugin properties -->
 
 <property>
@@ -791,6 +711,24 @@
   </description>
 </property>
 
+<property>
+  <name>indexer.score.power</name>
+  <value>0.5</value>
+  <description>Determines the power of link analyis scores.  Each
+  pages's boost is set to <i>score<sup>scorePower</sup></i> where
+  <i>score</i> is its link analysis score and <i>scorePower</i> is the
+  value of this parameter.  This is compiled into indexes, so, when
+  this is changed, pages must be re-indexed for it to take
+  effect.</description>
+</property>
+
+<property>
+  <name>indexer.max.title.length</name>
+  <value>100</value>
+  <description>The maximum number of characters of a title that are indexed.
+  </description>
+</property>
+
 <!-- URL normalizer properties -->
 
 <property>

svn commit: r1055503 - in /nutch/branches/branch-1.3: CHANGES.txt conf/nutch-default.xml

Reply via email to