Author: cutting
Date: Mon May  9 20:20:11 2005
New Revision: 169406

URL: http://svn.apache.org/viewcvs?rev=169406&view=rev
Log:
Add ability to set Lucene's term index interval from config.

Modified:
    incubator/nutch/trunk/conf/nutch-default.xml
    incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
    incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java

Modified: incubator/nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewcvs/incubator/nutch/trunk/conf/nutch-default.xml?rev=169406&r1=169405&r2=169406&view=diff
==============================================================================
--- incubator/nutch/trunk/conf/nutch-default.xml (original)
+++ incubator/nutch/trunk/conf/nutch-default.xml Mon May  9 20:20:11 2005
@@ -444,6 +444,17 @@
   </description>
 </property>
 
+<property>
+  <name>indexer.termIndexInterval</name>
+  <value>128</value>
+  <description>Determines the fraction of terms which Lucene keeps in
+  RAM when searching, to facilitate random-access.  Smaller values use
+  more memory but make searches somewhat faster.  Larger values use
+  less memory but make searches somewhat slower.
+  </description>
+</property>
+
+
 <!-- analysis properties -->
 
 <property>

Modified: 
incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
URL: 
http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?rev=169406&r1=169405&r2=169406&view=diff
==============================================================================
--- incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java 
(original)
+++ incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java 
Mon May  9 20:20:11 2005
@@ -47,6 +47,9 @@
       IndexWriter.DEFAULT_MIN_MERGE_DOCS);
   private int MAX_MERGE_DOCS = NutchConf.get().getInt("indexer.maxMergeDocs",
       IndexWriter.DEFAULT_MAX_MERGE_DOCS);
+  private int TERM_INDEX_INTERVAL =
+    NutchConf.get().getInt("indexer.termIndexInterval",
+                           IndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
   private NutchFileSystem nfs;
   private File outputIndex;
   private File localWorkingDir;
@@ -90,6 +93,7 @@
     writer.mergeFactor = MERGE_FACTOR;
     writer.minMergeDocs = MIN_MERGE_DOCS;
     writer.maxMergeDocs = MAX_MERGE_DOCS;
+    writer.setTermIndexInterval(TERM_INDEX_INTERVAL);
     writer.infoStream = LogFormatter.getLogStream(LOG, Level.FINE);
     writer.setUseCompoundFile(false);
     writer.setSimilarity(new NutchSimilarity());

Modified: 
incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
URL: 
http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java?rev=169406&r1=169405&r2=169406&view=diff
==============================================================================
--- incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java 
(original)
+++ incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java 
Mon May  9 20:20:11 2005
@@ -55,6 +55,9 @@
       IndexWriter.DEFAULT_MIN_MERGE_DOCS);
   private int MAX_MERGE_DOCS = NutchConf.get().getInt("indexer.maxMergeDocs",
       IndexWriter.DEFAULT_MAX_MERGE_DOCS);
+  private int TERM_INDEX_INTERVAL =
+    NutchConf.get().getInt("indexer.termIndexInterval",
+                           IndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
   private NutchFileSystem nfs;
   private long maxDocs = Long.MAX_VALUE;
   private File srcDir;
@@ -99,6 +102,7 @@
       writer.mergeFactor = MERGE_FACTOR;
       writer.minMergeDocs = MIN_MERGE_DOCS;
       writer.maxMergeDocs = MAX_MERGE_DOCS;
+      writer.setTermIndexInterval(TERM_INDEX_INTERVAL);
       writer.maxFieldLength = maxFieldLength;
       //writer.infoStream = LogFormatter.getLogStream(LOG, Level.FINE);
       writer.setUseCompoundFile(false);




-------------------------------------------------------
This SF.Net email is sponsored by Oracle Space Sweepstakes
Want to be the first software developer in space?
Enter now for the Oracle Space Sweepstakes!
http://ads.osdn.com/?ad_ids93&alloc_id281&op=click
_______________________________________________
Nutch-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to