cutting 02/01/21 09:07:23
Modified: src/java/org/apache/lucene/index IndexReader.java
SegmentReader.java SegmentTermDocs.java
SegmentTermPositions.java SegmentsReader.java
TermDocs.java TermEnum.java
Log:
Substantially improved the performance of DateFilter by adding the
ability to reuse TermDocs objects.
Revision Changes Path
1.6 +18 -4 jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java
Index: IndexReader.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- IndexReader.java 26 Dec 2001 17:23:05 -0000 1.5
+++ IndexReader.java 21 Jan 2002 17:07:23 -0000 1.6
@@ -193,7 +193,7 @@
abstract public int docFreq(Term t) throws IOException;
/** Returns an enumeration of all the documents which contain
- <code>Term</code>. For each document, the document number, the frequency of
+ <code>term</code>. For each document, the document number, the frequency of
the term in that document is also provided, for use in search scoring.
Thus, this method implements the mapping:
<p><ul>
@@ -201,10 +201,17 @@
</ul>
<p>The enumeration is ordered by document number. Each document number
is greater than all that precede it in the enumeration. */
- abstract public TermDocs termDocs(Term t) throws IOException;
+ public TermDocs termDocs(Term term) throws IOException {
+ TermDocs termDocs = termDocs();
+ termDocs.seek(term);
+ return termDocs;
+ }
+
+ /** Returns an unpositioned {@link TermDocs} enumerator. */
+ abstract public TermDocs termDocs() throws IOException;
/** Returns an enumeration of all the documents which contain
- <code>Term</code>. For each document, in addition to the document number
+ <code>term</code>. For each document, in addition to the document number
and frequency of the term in that document, a list of all of the ordinal
positions of the term in the document is available. Thus, this method
implements the mapping:
@@ -218,7 +225,14 @@
<p> This positional information faciliates phrase and proximity searching.
<p>The enumeration is ordered by document number. Each document number is
greater than all that precede it in the enumeration. */
- abstract public TermPositions termPositions(Term t) throws IOException;
+ public TermPositions termPositions(Term term) throws IOException {
+ TermPositions termPositions = termPositions();
+ termPositions.seek(term);
+ return termPositions;
+ }
+
+ /** Returns an unpositioned {@link TermPositions} enumerator. */
+ abstract public TermPositions termPositions() throws IOException;
/** Deletes the document numbered <code>docNum</code>. Once a document is
deleted it will not appear in TermDocs or TermPostitions enumerations.
1.3 +6 -22
jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java
Index: SegmentReader.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- SegmentReader.java 27 Sep 2001 16:27:01 -0000 1.2
+++ SegmentReader.java 21 Jan 2002 17:07:23 -0000 1.3
@@ -78,8 +78,8 @@
BitVector deletedDocs = null;
private boolean deletedDocsDirty = false;
- private InputStream freqStream;
- private InputStream proxStream;
+ InputStream freqStream;
+ InputStream proxStream;
private static class Norm {
@@ -194,28 +194,12 @@
return (deletedDocs != null && deletedDocs.get(n));
}
- public final TermDocs termDocs(Term t) throws IOException {
- TermInfo ti = tis.get(t);
- if (ti != null)
- return new SegmentTermDocs(this, ti);
- else
- return null;
+ public final TermDocs termDocs() throws IOException {
+ return new SegmentTermDocs(this);
}
- final InputStream getFreqStream () {
- return (InputStream)freqStream.clone();
- }
-
- public final TermPositions termPositions(Term t) throws IOException {
- TermInfo ti = tis.get(t);
- if (ti != null)
- return new SegmentTermPositions(this, ti);
- else
- return null;
- }
-
- final InputStream getProxStream () {
- return (InputStream)proxStream.clone();
+ public final TermPositions termPositions() throws IOException {
+ return new SegmentTermPositions(this);
}
public final int docFreq(Term t) throws IOException {
1.2 +15 -10
jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java
Index: SegmentTermDocs.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- SegmentTermDocs.java 18 Sep 2001 16:29:54 -0000 1.1
+++ SegmentTermDocs.java 21 Jan 2002 17:07:23 -0000 1.2
@@ -66,21 +66,26 @@
int doc = 0;
int freq;
- SegmentTermDocs(SegmentReader p) throws IOException {
- parent = p;
- freqStream = parent.getFreqStream();
- deletedDocs = parent.deletedDocs;
+ SegmentTermDocs(SegmentReader parent)
+ throws IOException {
+ this.parent = parent;
+ this.freqStream = (InputStream)parent.freqStream.clone();
+ this.deletedDocs = parent.deletedDocs;
}
-
- SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException {
- this(p);
+
+ public void seek(Term term) throws IOException {
+ TermInfo ti = parent.tis.get(term);
seek(ti);
}
void seek(TermInfo ti) throws IOException {
- freqCount = ti.docFreq;
- doc = 0;
- freqStream.seek(ti.freqPointer);
+ if (ti == null) {
+ freqCount = 0;
+ } else {
+ freqCount = ti.docFreq;
+ doc = 0;
+ freqStream.seek(ti.freqPointer);
+ }
}
public void close() throws IOException {
1.2 +1 -7
jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java
Index: SegmentTermPositions.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- SegmentTermPositions.java 18 Sep 2001 16:29:54 -0000 1.1
+++ SegmentTermPositions.java 21 Jan 2002 17:07:23 -0000 1.2
@@ -66,13 +66,7 @@
SegmentTermPositions(SegmentReader p) throws IOException {
super(p);
- proxStream = parent.getProxStream();
- }
-
- SegmentTermPositions(SegmentReader p, TermInfo ti)
- throws IOException {
- this(p);
- seek(ti);
+ this.proxStream = (InputStream)parent.proxStream.clone();
}
final void seek(TermInfo ti) throws IOException {
1.3 +38 -20
jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java
Index: SegmentsReader.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- SegmentsReader.java 11 Oct 2001 22:44:23 -0000 1.2
+++ SegmentsReader.java 21 Jan 2002 17:07:23 -0000 1.3
@@ -151,12 +151,12 @@
return total;
}
- public final TermDocs termDocs(Term term) throws IOException {
- return new SegmentsTermDocs(readers, starts, term);
+ public final TermDocs termDocs() throws IOException {
+ return new SegmentsTermDocs(readers, starts);
}
- public final TermPositions termPositions(Term term) throws IOException {
- return new SegmentsTermPositions(readers, starts, term);
+ public final TermPositions termPositions() throws IOException {
+ return new SegmentsTermPositions(readers, starts);
}
public final void close() throws IOException {
@@ -240,14 +240,16 @@
protected int base = 0;
protected int pointer = 0;
- SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) {
+ private SegmentTermDocs[] segTermDocs;
+ protected SegmentTermDocs current; // == segTermDocs[pointer]
+
+ SegmentsTermDocs(SegmentReader[] r, int[] s) {
readers = r;
starts = s;
- term = t;
+
+ segTermDocs = new SegmentTermDocs[r.length];
}
- protected SegmentTermDocs current;
-
public final int doc() {
return base + current.doc;
}
@@ -255,14 +257,19 @@
return current.freq;
}
+ public final void seek(Term term) {
+ this.term = term;
+ this.base = 0;
+ this.pointer = 0;
+ this.current = null;
+ }
+
public final boolean next() throws IOException {
if (current != null && current.next()) {
return true;
} else if (pointer < readers.length) {
- if (current != null)
- current.close();
base = starts[pointer];
- current = termDocs(readers[pointer++]);
+ current = termDocs(pointer++);
return next();
} else
return false;
@@ -275,14 +282,13 @@
while (current == null) {
if (pointer < readers.length) { // try next segment
base = starts[pointer];
- current = termDocs(readers[pointer++]);
+ current = termDocs(pointer++);
} else {
return 0;
}
}
int end = current.read(docs, freqs);
if (end == 0) { // none left in segment
- current.close();
current = null;
} else { // got some
final int b = base; // adjust doc numbers
@@ -302,25 +308,37 @@
return true;
}
+ private SegmentTermDocs termDocs(int i) throws IOException {
+ if (term == null)
+ return null;
+ SegmentTermDocs result = segTermDocs[i];
+ if (result == null)
+ result = segTermDocs[i] = termDocs(readers[i]);
+ result.seek(term);
+ return result;
+ }
+
protected SegmentTermDocs termDocs(SegmentReader reader)
- throws IOException {
- return (SegmentTermDocs)reader.termDocs(term);
+ throws IOException {
+ return (SegmentTermDocs)reader.termDocs();
}
public final void close() throws IOException {
- if (current != null)
- current.close();
+ for (int i = 0; i < segTermDocs.length; i++) {
+ if (segTermDocs[i] != null)
+ segTermDocs[i].close();
+ }
}
}
class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
- SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) {
- super(r,s,t);
+ SegmentsTermPositions(SegmentReader[] r, int[] s) {
+ super(r,s);
}
protected final SegmentTermDocs termDocs(SegmentReader reader)
throws IOException {
- return (SegmentTermDocs)reader.termPositions(term);
+ return (SegmentTermDocs)reader.termPositions();
}
public final int nextPosition() throws IOException {
1.3 +6 -1 jakarta-lucene/src/java/org/apache/lucene/index/TermDocs.java
Index: TermDocs.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermDocs.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- TermDocs.java 25 Dec 2001 19:27:04 -0000 1.2
+++ TermDocs.java 21 Jan 2002 17:07:23 -0000 1.3
@@ -67,6 +67,11 @@
*/
public interface TermDocs {
+ /** Sets this to the data for a term.
+ * The enumeration is reset to the start of the data for this term.
+ */
+ void seek(Term term) throws IOException;
+
/** Returns the current document number. <p> This is invalid until {@link
#next()} is called for the first time.*/
int doc();
@@ -91,7 +96,7 @@
/** Skips entries to the first beyond the current whose document number is
* greater than or equal to <i>target</i>. <p>Returns true iff there is such
* an entry. <p>Behaves as if written: <pre>
- * public boolean skipTo(int target) {
+ * boolean skipTo(int target) {
* do {
* if (!next())
* return false;
1.2 +2 -4 jakarta-lucene/src/java/org/apache/lucene/index/TermEnum.java
Index: TermEnum.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermEnum.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TermEnum.java 18 Sep 2001 16:29:55 -0000 1.1
+++ TermEnum.java 21 Jan 2002 17:07:23 -0000 1.2
@@ -65,12 +65,10 @@
/** Increments the enumeration to the next element. True if one exists.*/
abstract public boolean next() throws IOException;
- /** Returns the current Term in the enumeration.
- Initially invalid, valid after next() called for the first time.*/
+ /** Returns the current Term in the enumeration.*/
abstract public Term term();
- /** Returns the docFreq of the current Term in the enumeration.
- Initially invalid, valid after next() called for the first time.*/
+ /** Returns the docFreq of the current Term in the enumeration.*/
abstract public int docFreq();
/** Closes the enumeration to further activity, freeing resources. */
--
To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>