Author: cutting
Date: Wed Mar 9 10:58:26 2005
New Revision: 156669
URL: http://svn.apache.org/viewcvs?view=rev&rev=156669
Log:
Added IndexWriter.setTermIndexInterval().
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
Modified: lucene/java/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Mar 9 10:58:26 2005
@@ -78,6 +78,10 @@
compound index file.
(adapted from code contributed by Garrett Rooney; committed by Bernhard)
+13. Add IndexWriter.setTermIndexInterval() method. See javadocs.
+ (Doug Cutting)
+
+
API Changes
1. Several methods and fields have been deprecated. The API documentation
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java Wed
Mar 9 10:58:26 2005
@@ -39,10 +39,11 @@
private Similarity similarity;
private FieldInfos fieldInfos;
private int maxFieldLength;
+ private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
private PrintStream infoStream;
- /**
- *
+ /** This ctor used by test code only.
+ *
* @param directory The directory to write the document information to
* @param analyzer The analyzer to use for the document
* @param similarity The Similarity function
@@ -56,6 +57,14 @@
this.maxFieldLength = maxFieldLength;
}
+ DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) {
+ this.directory = directory;
+ this.analyzer = analyzer;
+ this.similarity = writer.getSimilarity();
+ this.maxFieldLength = writer.getMaxFieldLength();
+ this.termIndexInterval = writer.getTermIndexInterval();
+ }
+
final void addDocument(String segment, Document doc)
throws IOException {
// write field names
@@ -295,7 +304,8 @@
//open files for inverse index storage
freq = directory.createOutput(segment + ".frq");
prox = directory.createOutput(segment + ".prx");
- tis = new TermInfosWriter(directory, segment, fieldInfos);
+ tis = new TermInfosWriter(directory, segment, fieldInfos,
+ termIndexInterval);
TermInfo ti = new TermInfo();
String currentField = null;
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Wed Mar
9 10:58:26 2005
@@ -103,6 +103,16 @@
"10000"));
+ /** The default value for [EMAIL PROTECTED] #getTermIndexInterval()}. This
is
+ * determined by the <code>org.apache.lucene.termIndexInterval</code> system
+ * property. The default is 128.
+ */
+ public static final int DEFAULT_TERM_INDEX_INTERVAL =
+ Integer.parseInt(System.getProperty("org.apache.lucene.termIndexInterval",
+ "128"));
+
+
+
private Directory directory; // where this index resides
private Analyzer analyzer; // how to analyze text
@@ -113,6 +123,8 @@
private Lock writeLock;
+ private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+
/** Use compound file setting. Defaults to true, minimizing the number of
* files used. Setting this to false may improve indexing performance, but
* may also cause file handle problems.
@@ -154,6 +166,26 @@
return this.similarity;
}
+ /** Expert: Set the interval between indexed terms. Large values cause less
+ * memory to be used by IndexReader, but slow random-access to terms. Small
+ * values cause more memory to be used by an IndexReader, and speed
+ * random-access to terms. In particular,
+ * <code>numUniqueTerms/interval</code> terms are read into memory by an
+ * IndexReader, and, on average, <code>interval/2</code> terms must be
+ * scanned for each random term access.
+ *
+ * @see #DEFAULT_TERM_INDEX_INTERVAL
+ */
+ public void setTermIndexInterval(int interval) {
+ this.termIndexInterval = interval;
+ }
+
+ /** Expert: Return the interval between indexed terms.
+ *
+ * @see #setTermIndexInterval(int)
+ */
+ public int getTermIndexInterval() { return termIndexInterval; }
+
/**
* Constructs an IndexWriter for the index in <code>path</code>.
* Text will be analyzed with <code>a</code>. If <code>create</code>
@@ -359,6 +391,11 @@
}
}
+ /** Returns the Directory used by this index. */
+ public Directory getDirectory() {
+ return directory;
+ }
+
/** Returns the analyzer used by this index. */
public Analyzer getAnalyzer() {
return analyzer;
@@ -408,7 +445,7 @@
*/
public void addDocument(Document doc, Analyzer analyzer) throws IOException {
DocumentWriter dw =
- new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength);
+ new DocumentWriter(ramDirectory, analyzer, this);
dw.setInfoStream(infoStream);
String segmentName = newSegmentName();
dw.addDocument(segmentName, doc);
@@ -514,7 +551,7 @@
optimize(); // start with zero or
1 seg
final String mergedName = newSegmentName();
- SegmentMerger merger = new SegmentMerger(directory, mergedName);
+ SegmentMerger merger = new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector();
IndexReader sReader = null;
@@ -609,7 +646,7 @@
final String mergedName = newSegmentName();
if (infoStream != null) infoStream.print("merging segments");
SegmentMerger merger =
- new SegmentMerger(directory, mergedName);
+ new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector();
for (int i = minSegment; i < segmentInfos.size(); i++) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Wed
Mar 9 10:58:26 2005
@@ -39,6 +39,7 @@
final class SegmentMerger {
private Directory directory;
private String segment;
+ private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
private Vector readers = new Vector();
private FieldInfos fieldInfos;
@@ -51,7 +52,7 @@
"tvx", "tvd", "tvf"
};
- /**
+ /** This ctor used only by test code.
*
* @param dir The Directory to merge the other segments into
* @param name The name of the new segment
@@ -61,6 +62,12 @@
segment = name;
}
+ SegmentMerger(IndexWriter writer, String name) {
+ directory = writer.getDirectory();
+ segment = name;
+ termIndexInterval = writer.getTermIndexInterval();
+ }
+
/**
* Add an IndexReader to the collection of readers that are to be merged
* @param reader
@@ -220,7 +227,8 @@
freqOutput = directory.createOutput(segment + ".frq");
proxOutput = directory.createOutput(segment + ".prx");
termInfosWriter =
- new TermInfosWriter(directory, segment, fieldInfos);
+ new TermInfosWriter(directory, segment, fieldInfos,
+ termIndexInterval);
skipInterval = termInfosWriter.skipInterval;
queue = new SegmentMergeQueue(readers.size());
Modified:
lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java Wed
Mar 9 10:58:26 2005
@@ -61,20 +61,22 @@
private TermInfosWriter other = null;
- TermInfosWriter(Directory directory, String segment, FieldInfos fis)
+ TermInfosWriter(Directory directory, String segment, FieldInfos fis,
+ int interval)
throws IOException {
- initialize(directory, segment, fis, false);
- other = new TermInfosWriter(directory, segment, fis, true);
+ initialize(directory, segment, fis, interval, false);
+ other = new TermInfosWriter(directory, segment, fis, interval, true);
other.other = this;
}
private TermInfosWriter(Directory directory, String segment, FieldInfos fis,
- boolean isIndex) throws IOException {
- initialize(directory, segment, fis, isIndex);
+ int interval, boolean isIndex) throws IOException {
+ initialize(directory, segment, fis, interval, isIndex);
}
private void initialize(Directory directory, String segment, FieldInfos fis,
- boolean isi) throws IOException {
+ int interval, boolean isi) throws IOException {
+ indexInterval = interval;
fieldInfos = fis;
isIndex = isi;
output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis"));