Attached is a patch which delays reading of index terms until it is first accessed. The cost of this is another file descriptor, until the terms are accessed, when it is closed. The benefit is that operations that do not require access to index terms are much faster and use much less memory.

Thoughts?

Doug
Index: src/java/org/apache/lucene/index/TermInfosReader.java
===================================================================
--- src/java/org/apache/lucene/index/TermInfosReader.java	(revision 155349)
+++ src/java/org/apache/lucene/index/TermInfosReader.java	(working copy)
@@ -33,6 +33,12 @@
   private SegmentTermEnum origEnum;
   private long size;
 
+  private Term[] indexTerms = null;
+  private TermInfo[] indexInfos;
+  private long[] indexPointers;
+  
+  private SegmentTermEnum indexEnum;
+
   TermInfosReader(Directory dir, String seg, FieldInfos fis)
        throws IOException {
     directory = dir;
@@ -42,7 +48,10 @@
     origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"),
                                    fieldInfos, false);
     size = origEnum.size;
-    readIndex();
+
+    indexEnum =
+      new SegmentTermEnum(directory.openInput(segment + ".tii"),
+			  fieldInfos, true);
   }
 
   protected void finalize() {
@@ -73,28 +82,23 @@
     return termEnum;
   }
 
-  Term[] indexTerms = null;
-  TermInfo[] indexInfos;
-  long[] indexPointers;
-
-  private final void readIndex() throws IOException {
-    SegmentTermEnum indexEnum =
-      new SegmentTermEnum(directory.openInput(segment + ".tii"),
-			  fieldInfos, true);
+  private final void ensureIndexIsRead() throws IOException {
+    if (indexTerms != null)
+      return;
     try {
       int indexSize = (int)indexEnum.size;
 
       indexTerms = new Term[indexSize];
       indexInfos = new TermInfo[indexSize];
       indexPointers = new long[indexSize];
-
+        
       for (int i = 0; indexEnum.next(); i++) {
-	indexTerms[i] = indexEnum.term();
-	indexInfos[i] = indexEnum.termInfo();
-	indexPointers[i] = indexEnum.indexPointer;
+        indexTerms[i] = indexEnum.term();
+        indexInfos[i] = indexEnum.termInfo();
+        indexPointers[i] = indexEnum.indexPointer;
       }
     } finally {
-      indexEnum.close();
+        indexEnum.close();
     }
   }
 
@@ -126,6 +130,8 @@
   TermInfo get(Term term) throws IOException {
     if (size == 0) return null;
 
+    ensureIndexIsRead();
+
     // optimize sequential access: first try scanning cached enum w/o seeking
     SegmentTermEnum enumerator = getEnum();
     if (enumerator.term() != null                 // term is at or past current
@@ -179,6 +185,7 @@
   final long getPosition(Term term) throws IOException {
     if (size == 0) return -1;
 
+    ensureIndexIsRead();
     int indexOffset = getIndexOffset(term);
     seekEnum(indexOffset);
 

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to