Author: cutting
Date: Mon May 9 20:20:11 2005
New Revision: 169406
URL: http://svn.apache.org/viewcvs?rev=169406&view=rev
Log:
Add ability to set Lucene's term index interval from config.
Modified:
incubator/nutch/trunk/conf/nutch-default.xml
incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
Modified: incubator/nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewcvs/incubator/nutch/trunk/conf/nutch-default.xml?rev=169406&r1=169405&r2=169406&view=diff
==============================================================================
--- incubator/nutch/trunk/conf/nutch-default.xml (original)
+++ incubator/nutch/trunk/conf/nutch-default.xml Mon May 9 20:20:11 2005
@@ -444,6 +444,17 @@
</description>
</property>
+<property>
+ <name>indexer.termIndexInterval</name>
+ <value>128</value>
+ <description>Determines the fraction of terms which Lucene keeps in
+ RAM when searching, to facilitate random-access. Smaller values use
+ more memory but make searches somewhat faster. Larger values use
+ less memory but make searches somewhat slower.
+ </description>
+</property>
+
+
<!-- analysis properties -->
<property>
Modified:
incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
URL:
http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?rev=169406&r1=169405&r2=169406&view=diff
==============================================================================
--- incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
(original)
+++ incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
Mon May 9 20:20:11 2005
@@ -47,6 +47,9 @@
IndexWriter.DEFAULT_MIN_MERGE_DOCS);
private int MAX_MERGE_DOCS = NutchConf.get().getInt("indexer.maxMergeDocs",
IndexWriter.DEFAULT_MAX_MERGE_DOCS);
+ private int TERM_INDEX_INTERVAL =
+ NutchConf.get().getInt("indexer.termIndexInterval",
+ IndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
private NutchFileSystem nfs;
private File outputIndex;
private File localWorkingDir;
@@ -90,6 +93,7 @@
writer.mergeFactor = MERGE_FACTOR;
writer.minMergeDocs = MIN_MERGE_DOCS;
writer.maxMergeDocs = MAX_MERGE_DOCS;
+ writer.setTermIndexInterval(TERM_INDEX_INTERVAL);
writer.infoStream = LogFormatter.getLogStream(LOG, Level.FINE);
writer.setUseCompoundFile(false);
writer.setSimilarity(new NutchSimilarity());
Modified:
incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
URL:
http://svn.apache.org/viewcvs/incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java?rev=169406&r1=169405&r2=169406&view=diff
==============================================================================
--- incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
(original)
+++ incubator/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
Mon May 9 20:20:11 2005
@@ -55,6 +55,9 @@
IndexWriter.DEFAULT_MIN_MERGE_DOCS);
private int MAX_MERGE_DOCS = NutchConf.get().getInt("indexer.maxMergeDocs",
IndexWriter.DEFAULT_MAX_MERGE_DOCS);
+ private int TERM_INDEX_INTERVAL =
+ NutchConf.get().getInt("indexer.termIndexInterval",
+ IndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
private NutchFileSystem nfs;
private long maxDocs = Long.MAX_VALUE;
private File srcDir;
@@ -99,6 +102,7 @@
writer.mergeFactor = MERGE_FACTOR;
writer.minMergeDocs = MIN_MERGE_DOCS;
writer.maxMergeDocs = MAX_MERGE_DOCS;
+ writer.setTermIndexInterval(TERM_INDEX_INTERVAL);
writer.maxFieldLength = maxFieldLength;
//writer.infoStream = LogFormatter.getLogStream(LOG, Level.FINE);
writer.setUseCompoundFile(false);
-------------------------------------------------------
This SF.Net email is sponsored by Oracle Space Sweepstakes
Want to be the first software developer in space?
Enter now for the Oracle Space Sweepstakes!
http://ads.osdn.com/?ad_ids93&alloc_id281&op=click
_______________________________________________
Nutch-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs