(doris-thirdparty) branch clucene updated: [Fix](memory leak) fix memory leak found in fault injection case (#170)

jianliangqi Mon, 08 Jan 2024 03:14:08 -0800

This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git



The following commit(s) were added to refs/heads/clucene by this push:
     new d3bedb2d [Fix](memory leak) fix memory leak found in fault injection 
case (#170)
d3bedb2d is described below

commit d3bedb2d55484d80ae53e008287733f9d24ea72b
Author: airborne12 <[email protected]>
AuthorDate: Mon Jan 8 19:13:26 2024 +0800

    [Fix](memory leak) fix memory leak found in fault injection case (#170)
---
 src/core/CLucene/index/SegmentInfos.cpp    |   3 +
 src/core/CLucene/index/SegmentReader.cpp   |   7 +-
 src/core/CLucene/index/SegmentTermEnum.cpp | 801 ++++++++++++++---------------
 src/core/CLucene/index/TermInfosReader.cpp | 737 +++++++++++++-------------
 src/core/CLucene/index/_SegmentTermEnum.h  |   3 +-
 src/core/CLucene/store/IndexOutput.cpp     |  13 +-
 6 files changed, 778 insertions(+), 786 deletions(-)

diff --git a/src/core/CLucene/index/SegmentInfos.cpp 
b/src/core/CLucene/index/SegmentInfos.cpp
index 03532129..60a36954 100644
--- a/src/core/CLucene/index/SegmentInfos.cpp
+++ b/src/core/CLucene/index/SegmentInfos.cpp
@@ -826,6 +826,9 @@ string SegmentInfo::segString(Directory* dir) {
           // Try not to leave a truncated segments_N file in
           // the index:
           directory->deleteFile(segmentFileName.c_str());
+          if (output != nullptr) {
+              _CLDELETE(output);
+          }
         }
       )
     )
diff --git a/src/core/CLucene/index/SegmentReader.cpp 
b/src/core/CLucene/index/SegmentReader.cpp
index f7741a9f..72126366 100644
--- a/src/core/CLucene/index/SegmentReader.cpp
+++ b/src/core/CLucene/index/SegmentReader.cpp
@@ -257,7 +257,12 @@ SegmentReader *SegmentReader::get(Directory *dir, 
SegmentInfo *si,
     instance->init(dir, sis, closeDir);
     // TODO: make this configurable...
     bool fieldsReaderExist = false;
-    instance->initialize(si, readBufferSize == -1 ? 
BufferedIndexInput::BUFFER_SIZE : readBufferSize, doOpenStores, 
fieldsReaderExist);
+    try {
+        instance->initialize(si, readBufferSize == -1 ? 
BufferedIndexInput::BUFFER_SIZE : readBufferSize, doOpenStores, 
fieldsReaderExist);
+    } catch (CLuceneError& e) {
+        _CLDELETE(instance)
+        throw e;
+    }
     return instance;
 }
 
diff --git a/src/core/CLucene/index/SegmentTermEnum.cpp 
b/src/core/CLucene/index/SegmentTermEnum.cpp
index 574d9396..8179c7b7 100644
--- a/src/core/CLucene/index/SegmentTermEnum.cpp
+++ b/src/core/CLucene/index/SegmentTermEnum.cpp
@@ -5,424 +5,419 @@
 * the GNU Lesser General Public License, as specified in the COPYING file.
 
------------------------------------------------------------------------------*/
 #include "CLucene/_ApiHeader.h"
-#include "_SegmentHeader.h"
-#include "_SegmentTermEnum.h"
-
+#include "Term.h"
 #include "Terms.h"
 #include "_FieldInfos.h"
-#include "Term.h"
+#include "_SegmentHeader.h"
+#include "_SegmentTermEnum.h"
 #include "_TermInfo.h"
 #include "_TermInfosWriter.h"
 
 CL_NS_USE(store)
 CL_NS_DEF(index)
 
-       SegmentTermEnum::SegmentTermEnum(IndexInput* i, FieldInfos* fis, const 
bool isi, int32_t in_format):
-               fieldInfos(fis){
-       //Func - Constructor
-       //Pre  - i holds a reference to an instance of IndexInput
-       //       fis holds a reference to an instance of FieldInfos
-       //       isi
-       //Post - An instance of SegmentTermEnum has been created
-               input            = i;
-               position     = -1;
-               //Instantiate a Term with empty field, empty text and which is 
interned (see term.h what interned means)
-           _term         = _CLNEW Term;
-               isIndex      = isi;
-               termInfo     = _CLNEW TermInfo();
-               indexPointer = 0;
-               buffer       = NULL;
-               bufferLength = 0;
-               prev         = NULL;
-               formatM1SkipInterval = 0;
-               maxSkipLevels = 1;
-               
-               //Set isClone to false as the instance is not clone of another 
instance
-               isClone      = false;
-
-               int32_t firstInt = in_format == -4 ? in_format : 
input->readInt();
+SegmentTermEnum::SegmentTermEnum(IndexInput* i, FieldInfos* fis, const bool 
isi)
+        : fieldInfos(fis) {
+    //Func - Constructor
+    //Pre  - i holds a reference to an instance of IndexInput
+    //       fis holds a reference to an instance of FieldInfos
+    //       isi
+    //Post - An instance of SegmentTermEnum has been created
+    input = i;
+    position = -1;
+    //Instantiate a Term with empty field, empty text and which is interned 
(see term.h what interned means)
+    _term = _CLNEW Term;
+    isIndex = isi;
+    termInfo = _CLNEW TermInfo();
+    indexPointer = 0;
+    buffer = NULL;
+    bufferLength = 0;
+    prev = NULL;
+    formatM1SkipInterval = 0;
+    maxSkipLevels = 1;
+
+    //Set isClone to false as the instance is not clone of another instance
+    isClone = false;
+}
+
+void SegmentTermEnum::init(int32_t in_format) {
+    int32_t firstInt = in_format == -4 ? in_format : input->readInt();
 
     if (firstInt >= 0) {
-         // original-format file, without explicit format version number
-         format = 0;
-         size = firstInt;
+        // original-format file, without explicit format version number
+        format = 0;
+        size = firstInt;
 
-         // back-compatible settings
-         indexInterval = 128;
-         skipInterval = LUCENE_INT32_MAX_SHOULDBE; // switch off skipTo 
optimization
+        // back-compatible settings
+        indexInterval = 128;
+        skipInterval = LUCENE_INT32_MAX_SHOULDBE; // switch off skipTo 
optimization
 
-      } else {
-         // we have a format version number
-         format = firstInt;
+    } else {
+        // we have a format version number
+        format = firstInt;
 
-         // check that it is a format we can understand
-         if (format < TermInfosWriter::FORMAT){
+        // check that it is a format we can understand
+        if (format < TermInfosWriter::FORMAT) {
             TCHAR err[30];
-            _sntprintf(err,30,_T("Unknown format version: %d"), format);
-            _CLTHROWT(CL_ERR_CorruptIndex,err);
-         }
-
-                                if (format == -4) {
-                                               if (isIndex) {
-                                                       size = 
input->readLong();
-                                                       if (size < 0) {
-                                                               auto pos = 
input->getFilePointer();
-                                                               
input->seek(input->length() - 16);
-                                                               size = 
input->readLong();
-                                                               tisSize = 
input->readLong();
-                                                               
input->seek(pos);
-                                                       }
-                                                       
-                                                       indexInterval = 
input->readInt();
-                                                       skipInterval = 
input->readInt();
-                                                       maxSkipLevels = 
input->readInt();
-                                               }
-                                } else {
-                                               size = input->readLong();       
             // read the size
-                                               if (size < 0) {                 
             // read the size at file footer, if size < 0
-                                                               auto pos = 
input->getFilePointer();
-                                                               
input->seek(input->length() - 8);
-                                                               size = 
input->readLong();
-                                                               
input->seek(pos);
-                                               }
-                                               
-                                               if(format == -1){
-                                                               if (!isIndex) {
-                                                                       
indexInterval = input->readInt();
-                                                                       
formatM1SkipInterval = input->readInt();
-                                                               }
-                                                               // switch off 
skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in 
-                                                               // skipTo 
implementation of these versions
-                                                               skipInterval = 
LUCENE_INT32_MAX_SHOULDBE;
-                                               }else{
-                                                               indexInterval = 
input->readInt();
-                                                               skipInterval = 
input->readInt();
-                                                               if ( format == 
-3 ) {
-                                                                       // this 
new format introduces multi-level skipping
-                                                                       
maxSkipLevels = input->readInt();
-                                                               }
-                                               }
-                                }
-      }
-       }
-
-       SegmentTermEnum::SegmentTermEnum(const SegmentTermEnum& clone):
-               fieldInfos(clone.fieldInfos)
-       {
-       //Func - Constructor
-       //       The instance is created by cloning all properties of clone
-       //Pre  - clone holds a valid reference to SegmentTermEnum
-       //Post - An instance of SegmentTermEnum with the same properties as 
clone
-               
-               input            = clone.input->clone();
-               //Copy the postion from the clone
-               position     = clone.position;
-
-        if ( clone._term != NULL ){
-                       _term         = _CLNEW Term;
-                       _term->set(clone._term,clone._term->text());
-               }else
-                       _term = NULL;
-               isIndex      = clone.isIndex;
-               termInfo     = _CLNEW TermInfo(clone.termInfo);
-               indexPointer = clone.indexPointer;
-               buffer       = 
clone.buffer==NULL?NULL:(TCHAR*)malloc(sizeof(TCHAR) * (clone.bufferLength+1));
-               bufferLength = clone.bufferLength;
-               prev         = clone.prev==NULL?NULL:_CLNEW 
Term(clone.prev->field(),clone.prev->text(),false);
-               size         = clone.size;
-               tisSize     = clone.tisSize;
-
-      format       = clone.format;
-      indexInterval= clone.indexInterval;
-      skipInterval = clone.skipInterval;
-      formatM1SkipInterval = clone.formatM1SkipInterval;
-      maxSkipLevels = clone.maxSkipLevels;
-      
-               //Set isClone to true as this instance is a clone of another 
instance
-               isClone      = true;
-
-               //Copy the contents of buffer of clone to the buffer of this 
instance
-               if ( clone.buffer != NULL )
-                       memcpy(buffer,clone.buffer,bufferLength * 
sizeof(TCHAR));
-       }
-
-       SegmentTermEnum::~SegmentTermEnum(){
-       //Func - Destructor
-       //Pre  - true
-       //Post - The instance has been destroyed. If this instance was a clone
-       //       then the inputstream is closed and deleted too.
-
-        //todo: revisit this... close() should clean up most of everything.
-
-               //Finalize prev
-               _CLDECDELETE(prev );
-               //Finalize term
-               _CLDECDELETE( _term );
-               
-
-               //Delete the buffer if necessary
-               if ( buffer != NULL ) free(buffer);
-               //Delete termInfo if necessary
-               _CLDELETE(termInfo);
-
-               //Check if this instance is a clone
-               if ( isClone ){
-                       //Close the inputstream
-                       input->close();
-                       //delete the inputstream
-                       _CLDELETE(input);
-                       }
-       }
-
-       void SegmentTermEnum::initByTii(SegmentTermEnum* tii) {
-               if (format == -4) {
-                       size = tii->tisSize;
-                       indexInterval = tii->indexInterval;
-                       skipInterval = tii->skipInterval;
-                       maxSkipLevels = tii->maxSkipLevels;
-                       size_t header = sizeof(format) +
-                                                                               
        sizeof(size) +
-                                                                               
        sizeof(indexInterval) + 
-                                                                               
        sizeof(skipInterval) + 
-                                                                               
        sizeof(maxSkipLevels);
-                       input->seek(header);
-               }
-       }
-
-       const char* SegmentTermEnum::getObjectName() const{ return 
getClassName(); }
-       const char* SegmentTermEnum::getClassName(){ return "SegmentTermEnum"; }
-
-       bool SegmentTermEnum::next(){
-       //Func - Moves the current of the set to the next in the set
-       //Pre  - true
-       //Post - If the end has been reached NULL is returned otherwise the 
term has
-       //       become the next Term in the enumeration
-
-               //Increase position by and and check if the end has been reached
-               if (position++ >= size-1) {
-                       //delete term
-                       _CLDECDELETE(_term);
-                       return false;
-               }
-
-               //delete the previous enumerated term
-               Term* tmp=NULL;
-               if ( prev != NULL ){
-                       if ( _LUCENE_ATOMIC_INT_GET(prev->__cl_refcount) > 1 ){
-                               _CLDECDELETE(prev); //todo: tune other places 
try and delete its term 
-                       }else
-                               tmp = prev; //we are going to re-use this term
-               }
-               //prev becomes the current enumerated term
-               prev = _term;
-               //term becomes the next term read from inputStream input
-               _term = readTerm(tmp);
-
-               //Read docFreq, the number of documents which contain the term.
-               termInfo->docFreq = input->readVInt();
-               //Read freqPointer, a pointer into the TermFreqs file (.frq)
-               termInfo->freqPointer += input->readVLong();
-               
-               //Read proxPointer, a pointer into the TermPosition file (.prx).
-               termInfo->proxPointer += input->readVLong();
-
-      if(format == -1){
-         //  just read skipOffset in order to increment  file pointer; 
-         // value is never used since skipTo is switched off
-         if (!isIndex) {
+            _sntprintf(err, 30, _T("Unknown format version: %d"), format);
+            _CLTHROWT(CL_ERR_CorruptIndex, err);
+        }
+
+        if (format == -4) {
+            if (isIndex) {
+                size = input->readLong();
+                if (size < 0) {
+                    auto pos = input->getFilePointer();
+                    input->seek(input->length() - 16);
+                    size = input->readLong();
+                    tisSize = input->readLong();
+                    input->seek(pos);
+                }
+
+                indexInterval = input->readInt();
+                skipInterval = input->readInt();
+                maxSkipLevels = input->readInt();
+            }
+        } else {
+            size = input->readLong(); // read the size
+            if (size < 0) {           // read the size at file footer, if size 
< 0
+                auto pos = input->getFilePointer();
+                input->seek(input->length() - 8);
+                size = input->readLong();
+                input->seek(pos);
+            }
+
+            if (format == -1) {
+                if (!isIndex) {
+                    indexInterval = input->readInt();
+                    formatM1SkipInterval = input->readInt();
+                }
+                // switch off skipTo optimization for file format prior to 
1.4rc2 in order to avoid a bug in
+                // skipTo implementation of these versions
+                skipInterval = LUCENE_INT32_MAX_SHOULDBE;
+            } else {
+                indexInterval = input->readInt();
+                skipInterval = input->readInt();
+                if (format == -3) {
+                    // this new format introduces multi-level skipping
+                    maxSkipLevels = input->readInt();
+                }
+            }
+        }
+    }
+}
+
+SegmentTermEnum::SegmentTermEnum(const SegmentTermEnum& clone) : 
fieldInfos(clone.fieldInfos) {
+    //Func - Constructor
+    //       The instance is created by cloning all properties of clone
+    //Pre  - clone holds a valid reference to SegmentTermEnum
+    //Post - An instance of SegmentTermEnum with the same properties as clone
+
+    input = clone.input->clone();
+    //Copy the postion from the clone
+    position = clone.position;
+
+    if (clone._term != NULL) {
+        _term = _CLNEW Term;
+        _term->set(clone._term, clone._term->text());
+    } else
+        _term = NULL;
+    isIndex = clone.isIndex;
+    termInfo = _CLNEW TermInfo(clone.termInfo);
+    indexPointer = clone.indexPointer;
+    buffer = clone.buffer == NULL ? NULL : (TCHAR*)malloc(sizeof(TCHAR) * 
(clone.bufferLength + 1));
+    bufferLength = clone.bufferLength;
+    prev = clone.prev == NULL ? NULL : _CLNEW Term(clone.prev->field(), 
clone.prev->text(), false);
+    size = clone.size;
+    tisSize = clone.tisSize;
+
+    format = clone.format;
+    indexInterval = clone.indexInterval;
+    skipInterval = clone.skipInterval;
+    formatM1SkipInterval = clone.formatM1SkipInterval;
+    maxSkipLevels = clone.maxSkipLevels;
+
+    //Set isClone to true as this instance is a clone of another instance
+    isClone = true;
+
+    //Copy the contents of buffer of clone to the buffer of this instance
+    if (clone.buffer != NULL) memcpy(buffer, clone.buffer, bufferLength * 
sizeof(TCHAR));
+}
+
+SegmentTermEnum::~SegmentTermEnum() {
+    //Func - Destructor
+    //Pre  - true
+    //Post - The instance has been destroyed. If this instance was a clone
+    //       then the inputstream is closed and deleted too.
+
+    //todo: revisit this... close() should clean up most of everything.
+
+    //Finalize prev
+    _CLDECDELETE(prev);
+    //Finalize term
+    _CLDECDELETE(_term);
+
+    //Delete the buffer if necessary
+    if (buffer != NULL) free(buffer);
+    //Delete termInfo if necessary
+    _CLDELETE(termInfo);
+
+    //Check if this instance is a clone
+    if (isClone) {
+        //Close the inputstream
+        input->close();
+        //delete the inputstream
+        _CLDELETE(input);
+    }
+}
+
+void SegmentTermEnum::initByTii(SegmentTermEnum* tii) {
+    if (format == -4) {
+        size = tii->tisSize;
+        indexInterval = tii->indexInterval;
+        skipInterval = tii->skipInterval;
+        maxSkipLevels = tii->maxSkipLevels;
+        size_t header = sizeof(format) + sizeof(size) + sizeof(indexInterval) +
+                        sizeof(skipInterval) + sizeof(maxSkipLevels);
+        input->seek(header);
+    }
+}
+
+const char* SegmentTermEnum::getObjectName() const {
+    return getClassName();
+}
+const char* SegmentTermEnum::getClassName() {
+    return "SegmentTermEnum";
+}
+
+bool SegmentTermEnum::next() {
+    //Func - Moves the current of the set to the next in the set
+    //Pre  - true
+    //Post - If the end has been reached NULL is returned otherwise the term 
has
+    //       become the next Term in the enumeration
+
+    //Increase position by and and check if the end has been reached
+    if (position++ >= size - 1) {
+        //delete term
+        _CLDECDELETE(_term);
+        return false;
+    }
+
+    //delete the previous enumerated term
+    Term* tmp = NULL;
+    if (prev != NULL) {
+        if (_LUCENE_ATOMIC_INT_GET(prev->__cl_refcount) > 1) {
+            _CLDECDELETE(prev); //todo: tune other places try and delete its 
term
+        } else
+            tmp = prev; //we are going to re-use this term
+    }
+    //prev becomes the current enumerated term
+    prev = _term;
+    //term becomes the next term read from inputStream input
+    _term = readTerm(tmp);
+
+    //Read docFreq, the number of documents which contain the term.
+    termInfo->docFreq = input->readVInt();
+    //Read freqPointer, a pointer into the TermFreqs file (.frq)
+    termInfo->freqPointer += input->readVLong();
+
+    //Read proxPointer, a pointer into the TermPosition file (.prx).
+    termInfo->proxPointer += input->readVLong();
+
+    if (format == -1) {
+        //  just read skipOffset in order to increment  file pointer;
+        // value is never used since skipTo is switched off
+        if (!isIndex) {
             if (termInfo->docFreq > formatM1SkipInterval) {
-               termInfo->skipOffset = input->readVInt(); 
+                termInfo->skipOffset = input->readVInt();
             }
-         }
-      }else{
-         if (termInfo->docFreq >= skipInterval) 
-            termInfo->skipOffset = input->readVInt();
-      }
-
-               //Check if the enumeration is an index
-               if (isIndex)
-                       //read index pointer
-                       indexPointer += input->readVLong();
-
-               return true;
-       }
-
-       Term* SegmentTermEnum::term(bool pointer) {
-               if ( pointer )
-                       return _CL_POINTER(_term);
-               else
-                       return _term;
-       }
-
-       void SegmentTermEnum::scanTo(const Term *term){
-       //Func - Scan for Term without allocating new Terms
-       //Pre  - term != NULL
-       //Post - The iterator term has been moved to the position where Term is 
expected to be
-       //       in the enumeration
-               while ( term->compareTo(this->_term) > 0 && next()) 
-               {
-               }
-       }
-
-       void SegmentTermEnum::close() {
-       //Func - Closes the enumeration to further activity, freeing resources.
-       //Pre  - true
-       //Post - The inputStream input has been closed
-
-                       input->close();
-       }
-
-       int32_t SegmentTermEnum::docFreq() const {
-       //Func - Returns the document frequency of the current term in the set
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post  - The document frequency of the current enumerated term has 
been returned
-
-               return termInfo->docFreq;
-       }
-
-       void SegmentTermEnum::seek(const int64_t pointer, const int32_t p, 
Term* t, TermInfo* ti) {
-       //Func - Repositions term and termInfo within the enumeration
-       //Pre  - pointer >= 0
-       //       p >= 0 and contains the new position within the enumeration
-       //       t is a valid reference to a Term and is the new current term 
in the enumeration
-       //       ti is a valid reference to a TermInfo and is corresponding 
TermInfo form the new
-       //       current Term
-       //Post - term and terminfo have been repositioned within the enumeration
-
-               //Reset the IndexInput input to pointer
-               input->seek(pointer);
-               //Assign the new position
-               position = p;
-
-               //finalize the current term
-               if ( _term == NULL || 
_LUCENE_ATOMIC_INT_GET(_term->__cl_refcount) > 1 ){
-                       _CLDECDELETE(_term);
-                       //Get a pointer from t and increase the reference 
counter of t
-                       _term = _CLNEW Term; //cannot use reference, because 
TermInfosReader uses non ref-counted array
-               }
-               _term->set(t,t->text());
-
-               //finalize prev
-               _CLDECDELETE(prev);
-
-               //Change the current termInfo so it matches the new current term
-               termInfo->set(ti);
-
-               //Have the buffer grown if needed
-               if ( bufferLength <= _term->textLength() )
-                       growBuffer(_term->textLength(), true );           // 
copy term text into buffer
-               else
-                       _tcsncpy(buffer,_term->text(),bufferLength); //just 
copy the buffer
-       }
-
-       TermInfo* SegmentTermEnum::getTermInfo()const {
-       //Func - Returns a clone of the current termInfo
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post - A clone of the current termInfo has been returned
-
-               return _CLNEW TermInfo(*termInfo); //clone
-       }
-
-       void SegmentTermEnum::getTermInfo(TermInfo* ti)const {
-       //Func - Retrieves a clone of termInfo through the reference ti
-       //Pre  - ti contains a valid reference to TermInfo
-       //       termInfo != NULL
-       //       next() must have been called once
-       //Post - ti contains a clone of termInfo
-
-               ti->set(termInfo);
-       }
-
-       int64_t SegmentTermEnum::freqPointer()const {
-       //Func - Returns the freqpointer of the current termInfo
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post - The freqpointer of the current termInfo has been returned
-
-               return termInfo->freqPointer;
-       }
-
-       int64_t SegmentTermEnum::proxPointer()const {
-       //Func - Returns the proxPointer of the current termInfo
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post - the proxPointer of the current termInfo has been returned
-
-               return termInfo->proxPointer;
-       }
-
-       SegmentTermEnum* SegmentTermEnum::clone() const {
-       //Func - Returns a clone of this instance
-       //Pre  - true
-       //Post - An clone of this instance has been returned
-
-               return _CLNEW SegmentTermEnum(*this);
-       }
-
-       Term* SegmentTermEnum::readTerm(Term* reuse) {
-       //Func - Reads the next term in the enumeration
-       //Pre  - true
-       //Post - The next Term in the enumeration has been read and returned
-
-               //Read the start position from the inputStream input
-               int32_t start = input->readVInt();
-               //Read the length of term in the inputStream input
-               int32_t length = input->readVInt();
-
-               //Calculated the total lenght of bytes that buffer must be to 
contain the current
-               //chars in buffer and the new ones yet to be read
-               uint32_t totalLength = start + length;
-
-               if (static_cast<uint32_t>(bufferLength) < totalLength+1)
-                       growBuffer(totalLength, false); //dont copy the buffer 
over.
-
-               //Read a length number of characters into the buffer from 
position start in the inputStream input
-               input->readChars(buffer, start, length);
-               //Null terminate the string
-               buffer[totalLength] = 0;
-
-               //Return a new Term     
-               int32_t field = input->readVInt();
-               const TCHAR* fieldname = fieldInfos->fieldName(field);
-               if ( reuse == NULL )
-                       reuse = _CLNEW Term;
-
-               reuse->set(fieldname, buffer, false);
-               return reuse;
-       }
-
-       void SegmentTermEnum::growBuffer(const uint32_t length, bool 
force_copy) {
-       //Func - Instantiate a buffer of length length+1
-       //Pre  - length > 0
-       //Post - pre(buffer) has been deleted with its contents. A new buffer
-       //       has been allocated of length length+1 and the text of term has 
been copied
-       //       to buffer
-               //todo: we could guess that we will need to re-grow this
-               //buffer a few times...so start off with a reasonable grow
-               //value...
-               if ( bufferLength > length )
-                       return;
-
-        //Store the new bufferLength
-               if ( length - bufferLength < 8 )
-                       bufferLength = length+8;
-               else
-                       bufferLength = length+1;
-
-               bool copy = buffer==NULL;
-
-               //Instantiate the new buffer + 1 is needed for terminator '\0'
-               if ( buffer == NULL )
-                       buffer = (TCHAR*)malloc(sizeof(TCHAR) * 
(bufferLength+1));
-               else
-                       buffer = (TCHAR*)realloc(buffer, sizeof(TCHAR) * 
(bufferLength+1));
-
-               if ( copy || force_copy){
-                       //Copy the text of term into buffer
-                       _tcsncpy(buffer,_term->text(),bufferLength);
-               }
-       }
+        }
+    } else {
+        if (termInfo->docFreq >= skipInterval) termInfo->skipOffset = 
input->readVInt();
+    }
+
+    //Check if the enumeration is an index
+    if (isIndex)
+        //read index pointer
+        indexPointer += input->readVLong();
+
+    return true;
+}
+
+Term* SegmentTermEnum::term(bool pointer) {
+    if (pointer)
+        return _CL_POINTER(_term);
+    else
+        return _term;
+}
+
+void SegmentTermEnum::scanTo(const Term* term) {
+    //Func - Scan for Term without allocating new Terms
+    //Pre  - term != NULL
+    //Post - The iterator term has been moved to the position where Term is 
expected to be
+    //       in the enumeration
+    while (term->compareTo(this->_term) > 0 && next()) {
+    }
+}
+
+void SegmentTermEnum::close() {
+    //Func - Closes the enumeration to further activity, freeing resources.
+    //Pre  - true
+    //Post - The inputStream input has been closed
+
+    input->close();
+}
+
+int32_t SegmentTermEnum::docFreq() const {
+    //Func - Returns the document frequency of the current term in the set
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post  - The document frequency of the current enumerated term has been 
returned
+
+    return termInfo->docFreq;
+}
+
+void SegmentTermEnum::seek(const int64_t pointer, const int32_t p, Term* t, 
TermInfo* ti) {
+    //Func - Repositions term and termInfo within the enumeration
+    //Pre  - pointer >= 0
+    //       p >= 0 and contains the new position within the enumeration
+    //       t is a valid reference to a Term and is the new current term in 
the enumeration
+    //       ti is a valid reference to a TermInfo and is corresponding 
TermInfo form the new
+    //       current Term
+    //Post - term and terminfo have been repositioned within the enumeration
+
+    //Reset the IndexInput input to pointer
+    input->seek(pointer);
+    //Assign the new position
+    position = p;
+
+    //finalize the current term
+    if (_term == NULL || _LUCENE_ATOMIC_INT_GET(_term->__cl_refcount) > 1) {
+        _CLDECDELETE(_term);
+        //Get a pointer from t and increase the reference counter of t
+        _term = _CLNEW
+                Term; //cannot use reference, because TermInfosReader uses non 
ref-counted array
+    }
+    _term->set(t, t->text());
+
+    //finalize prev
+    _CLDECDELETE(prev);
+
+    //Change the current termInfo so it matches the new current term
+    termInfo->set(ti);
+
+    //Have the buffer grown if needed
+    if (bufferLength <= _term->textLength())
+        growBuffer(_term->textLength(), true); // copy term text into buffer
+    else
+        _tcsncpy(buffer, _term->text(), bufferLength); //just copy the buffer
+}
+
+TermInfo* SegmentTermEnum::getTermInfo() const {
+    //Func - Returns a clone of the current termInfo
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post - A clone of the current termInfo has been returned
+
+    return _CLNEW TermInfo(*termInfo); //clone
+}
+
+void SegmentTermEnum::getTermInfo(TermInfo* ti) const {
+    //Func - Retrieves a clone of termInfo through the reference ti
+    //Pre  - ti contains a valid reference to TermInfo
+    //       termInfo != NULL
+    //       next() must have been called once
+    //Post - ti contains a clone of termInfo
+
+    ti->set(termInfo);
+}
+
+int64_t SegmentTermEnum::freqPointer() const {
+    //Func - Returns the freqpointer of the current termInfo
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post - The freqpointer of the current termInfo has been returned
+
+    return termInfo->freqPointer;
+}
+
+int64_t SegmentTermEnum::proxPointer() const {
+    //Func - Returns the proxPointer of the current termInfo
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post - the proxPointer of the current termInfo has been returned
+
+    return termInfo->proxPointer;
+}
+
+SegmentTermEnum* SegmentTermEnum::clone() const {
+    //Func - Returns a clone of this instance
+    //Pre  - true
+    //Post - An clone of this instance has been returned
+
+    return _CLNEW SegmentTermEnum(*this);
+}
+
+Term* SegmentTermEnum::readTerm(Term* reuse) {
+    //Func - Reads the next term in the enumeration
+    //Pre  - true
+    //Post - The next Term in the enumeration has been read and returned
+
+    //Read the start position from the inputStream input
+    int32_t start = input->readVInt();
+    //Read the length of term in the inputStream input
+    int32_t length = input->readVInt();
+
+    //Calculated the total lenght of bytes that buffer must be to contain the 
current
+    //chars in buffer and the new ones yet to be read
+    uint32_t totalLength = start + length;
+
+    if (static_cast<uint32_t>(bufferLength) < totalLength + 1)
+        growBuffer(totalLength, false); //dont copy the buffer over.
+
+    //Read a length number of characters into the buffer from position start 
in the inputStream input
+    input->readChars(buffer, start, length);
+    //Null terminate the string
+    buffer[totalLength] = 0;
+
+    //Return a new Term
+    int32_t field = input->readVInt();
+    const TCHAR* fieldname = fieldInfos->fieldName(field);
+    if (reuse == NULL) reuse = _CLNEW Term;
+
+    reuse->set(fieldname, buffer, false);
+    return reuse;
+}
+
+void SegmentTermEnum::growBuffer(const uint32_t length, bool force_copy) {
+    //Func - Instantiate a buffer of length length+1
+    //Pre  - length > 0
+    //Post - pre(buffer) has been deleted with its contents. A new buffer
+    //       has been allocated of length length+1 and the text of term has 
been copied
+    //       to buffer
+    //todo: we could guess that we will need to re-grow this
+    //buffer a few times...so start off with a reasonable grow
+    //value...
+    if (bufferLength > length) return;
+
+    //Store the new bufferLength
+    if (length - bufferLength < 8)
+        bufferLength = length + 8;
+    else
+        bufferLength = length + 1;
+
+    bool copy = buffer == NULL;
+
+    //Instantiate the new buffer + 1 is needed for terminator '\0'
+    if (buffer == NULL)
+        buffer = (TCHAR*)malloc(sizeof(TCHAR) * (bufferLength + 1));
+    else
+        buffer = (TCHAR*)realloc(buffer, sizeof(TCHAR) * (bufferLength + 1));
+
+    if (copy || force_copy) {
+        //Copy the text of term into buffer
+        _tcsncpy(buffer, _term->text(), bufferLength);
+    }
+}
 
 CL_NS_END
diff --git a/src/core/CLucene/index/TermInfosReader.cpp 
b/src/core/CLucene/index/TermInfosReader.cpp
index 7996d4d4..6cf8b42f 100644
--- a/src/core/CLucene/index/TermInfosReader.cpp
+++ b/src/core/CLucene/index/TermInfosReader.cpp
@@ -24,207 +24,206 @@ CL_NS_USE(store)
 CL_NS_USE(util)
 CL_NS_DEF(index)
 
-
-  TermInfosReader::TermInfosReader(Directory* dir, const char* seg, 
FieldInfos* fis, const int32_t readBufferSize):
-      directory (dir),fieldInfos (fis), indexTerms(NULL), indexInfos(NULL), 
indexPointers(NULL), indexDivisor(1)
-  {
-  //Func - Constructor.
-  //       Reads the TermInfos file (.tis) and eventually the Term Info Index 
file (.tii)
-  //Pre  - dir is a reference to a valid Directory
-  //       Fis contains a valid reference to an FieldInfos instance
-  //       seg != NULL and contains the name of the segment
-  //Post - An instance has been created and the index named seg has been read. 
(Remember
-  //       a segment is nothing more then an independently readable index)
-
-      CND_PRECONDITION(seg != NULL, "seg is NULL");
-
-         //Initialize the name of the segment
-      segment    =  seg;
-
-      //Create a filname fo a Term Info File
-         string tisFile = Misc::segmentname(segment,".tis");
-         string tiiFile = Misc::segmentname(segment,".tii");
-         bool success = false;
+TermInfosReader::TermInfosReader(Directory* dir, const char* seg, FieldInfos* 
fis,
+                                 const int32_t readBufferSize)
+        : directory(dir),
+          fieldInfos(fis),
+          indexTerms(NULL),
+          indexInfos(NULL),
+          indexPointers(NULL),
+          indexDivisor(1) {
+    //Func - Constructor.
+    //       Reads the TermInfos file (.tis) and eventually the Term Info 
Index file (.tii)
+    //Pre  - dir is a reference to a valid Directory
+    //       Fis contains a valid reference to an FieldInfos instance
+    //       seg != NULL and contains the name of the segment
+    //Post - An instance has been created and the index named seg has been 
read. (Remember
+    //       a segment is nothing more then an independently readable index)
+
+    CND_PRECONDITION(seg != NULL, "seg is NULL");
+
+    //Initialize the name of the segment
+    segment = seg;
+
+    //Create a filname fo a Term Info File
+    string tisFile = Misc::segmentname(segment, ".tis");
+    string tiiFile = Misc::segmentname(segment, ".tii");
+    bool success = false;
     origEnum = indexEnum = NULL;
     _size = indexTermsLength = totalIndexInterval = 0;
-          indexIsRead = false;
-
-         try {
-      //Create an SegmentTermEnum for storing all the terms read of the segment
-
-      // tii
-      auto tiiStream = directory->openInput( tiiFile.c_str(), readBufferSize );
-      indexEnum = _CLNEW SegmentTermEnum(tiiStream, fieldInfos, true, -1);
-      CND_CONDITION(indexEnum != NULL, "No memory could be allocated for index 
enumerator");
-
-      // tis
-      auto tisStream = directory->openInput( tisFile.c_str(), readBufferSize );
-      origEnum = _CLNEW SegmentTermEnum(tisStream, fieldInfos, false, 
indexEnum->getFormat());
-      origEnum->initByTii(indexEnum);
-      CND_CONDITION(origEnum != NULL, "No memory could be allocated for index 
enumerator");
-      _size = origEnum->size;
-      totalIndexInterval = origEnum->indexInterval;
-
-      //call ensureIndexIsRead to load data to memory right now
-      ensureIndexIsRead();
-
-      success = true;
-         } _CLFINALLY({
-                 // With lock-less commits, it's entirely possible (and
-                 // fine) to hit a FileNotFound exception above. In
-                 // this case, we want to explicitly close any subset
-                 // of things that were opened so that we don't have to
-                 // wait for a GC to do so.
-                 if (!success) {
-                         close();
-                 }
-         });
-
-  }
-
-  TermInfosReader::~TermInfosReader(){
-  //Func - Destructor
-  //Pre  - true
-  //Post - The instance has been destroyed
-
-      //Close the TermInfosReader to be absolutly sure that enumerator has 
been closed
-         //and the arrays indexTerms, indexPointers and indexInfos and  their 
elements
-         //have been destroyed
-      close();
-  }
-  int32_t TermInfosReader::getSkipInterval() const {
+    indexIsRead = false;
+
+    try {
+        //Create an SegmentTermEnum for storing all the terms read of the 
segment
+
+        // tii
+        auto tiiStream = directory->openInput(tiiFile.c_str(), readBufferSize);
+        indexEnum = _CLNEW SegmentTermEnum(tiiStream, fieldInfos, true);
+        indexEnum->init(-1);
+        CND_CONDITION(indexEnum != NULL, "No memory could be allocated for 
index enumerator");
+
+        // tis
+        auto tisStream = directory->openInput(tisFile.c_str(), readBufferSize);
+        origEnum = _CLNEW SegmentTermEnum(tisStream, fieldInfos, false);
+        origEnum->init(indexEnum->getFormat());
+        origEnum->initByTii(indexEnum);
+        CND_CONDITION(origEnum != NULL, "No memory could be allocated for 
index enumerator");
+        _size = origEnum->size;
+        totalIndexInterval = origEnum->indexInterval;
+
+        //call ensureIndexIsRead to load data to memory right now
+        ensureIndexIsRead();
+
+        success = true;
+    }
+    _CLFINALLY({
+        // With lock-less commits, it's entirely possible (and
+        // fine) to hit a FileNotFound exception above. In
+        // this case, we want to explicitly close any subset
+        // of things that were opened so that we don't have to
+        // wait for a GC to do so.
+        if (!success) {
+            close();
+        }
+    });
+}
+
+TermInfosReader::~TermInfosReader() {
+    //Func - Destructor
+    //Pre  - true
+    //Post - The instance has been destroyed
+
+    //Close the TermInfosReader to be absolutly sure that enumerator has been 
closed
+    //and the arrays indexTerms, indexPointers and indexInfos and  their 
elements
+    //have been destroyed
+    close();
+}
+int32_t TermInfosReader::getSkipInterval() const {
     return origEnum->skipInterval;
-  }
+}
 
-  int32_t TermInfosReader::getMaxSkipLevels() const {
+int32_t TermInfosReader::getMaxSkipLevels() const {
     return origEnum->maxSkipLevels;
-  }
-
-  void TermInfosReader::setIndexDivisor(const int32_t _indexDivisor) {
-         if (indexDivisor < 1)
-                 _CLTHROWA(CL_ERR_IllegalArgument, "indexDivisor must be > 0");
+}
 
-         if (indexTerms != NULL)
-                 _CLTHROWA(CL_ERR_IllegalArgument, "index terms are already 
loaded");
+void TermInfosReader::setIndexDivisor(const int32_t _indexDivisor) {
+    if (indexDivisor < 1) _CLTHROWA(CL_ERR_IllegalArgument, "indexDivisor must 
be > 0");
 
-         this->indexDivisor = _indexDivisor;
-         totalIndexInterval = origEnum->indexInterval * _indexDivisor;
-  }
+    if (indexTerms != NULL) _CLTHROWA(CL_ERR_IllegalArgument, "index terms are 
already loaded");
 
-  int32_t TermInfosReader::getIndexDivisor() const { return indexDivisor; }
-  void TermInfosReader::close() {
+    this->indexDivisor = _indexDivisor;
+    totalIndexInterval = origEnum->indexInterval * _indexDivisor;
+}
 
-         //Check if indexTerms and indexInfos exist
-     if (indexTerms && indexInfos){
-          //Iterate through arrays indexTerms and indexPointer to
-             //destroy their elements
+int32_t TermInfosReader::getIndexDivisor() const {
+    return indexDivisor;
+}
+void TermInfosReader::close() {
+    //Check if indexTerms and indexInfos exist
+    if (indexTerms && indexInfos) {
+        //Iterate through arrays indexTerms and indexPointer to
+        //destroy their elements
 #ifdef _DEBUG
-         for ( int32_t i=0; i<indexTermsLength;++i ){
+        for (int32_t i = 0; i < indexTermsLength; ++i) {
             indexTerms[i].__cl_refcount--;
-         }
+        }
 #endif
-     }
-         //Delete the arrays
-     if (indexTerms){
-         delete [] indexTerms;
-        indexTerms = NULL;
-     }
-     if (indexInfos){
-         _CLDELETE_ARRAY(indexInfos);
-        indexInfos = NULL;
-     }
-
-      //Delete the arrays
-     if (indexPointers) {
+    }
+    //Delete the arrays
+    if (indexTerms) {
+        delete[] indexTerms;
+        indexTerms = NULL;
+    }
+    if (indexInfos) {
+        _CLDELETE_ARRAY(indexInfos);
+        indexInfos = NULL;
+    }
+
+    //Delete the arrays
+    if (indexPointers) {
         _CLDELETE_ARRAY(indexPointers);
         indexPointers = NULL;
-     }
+    }
 
-      if (origEnum != NULL){
+    if (origEnum != NULL) {
         origEnum->close();
 
-           //Get a pointer to IndexInput used by the enumeration but
-           //instantiated in the constructor by directory.open( tisFile )
-        IndexInput *is = origEnum->input;
+        //Get a pointer to IndexInput used by the enumeration but
+        //instantiated in the constructor by directory.open( tisFile )
+        IndexInput* is = origEnum->input;
 
         //Delete the enumuration enumerator
         _CLDELETE(origEnum);
 
         //Delete the IndexInput
         _CLDELETE(is);
-      }
+    }
 
-      if (indexEnum != NULL){
+    if (indexEnum != NULL) {
         indexEnum->close();
 
-           //Get a pointer to IndexInput used by the enumeration but
-           //instantiated in the constructor by directory.open( tiiFile )
-        IndexInput *is = indexEnum->input;
+        //Get a pointer to IndexInput used by the enumeration but
+        //instantiated in the constructor by directory.open( tiiFile )
+        IndexInput* is = indexEnum->input;
 
         //Delete the enumuration enumerator
         _CLDELETE(indexEnum);
-       indexEnum = NULL;
+        indexEnum = NULL;
 
         //Delete the IndexInput
         _CLDELETE(is);
-      }
-         enumerators.setNull();
-  }
-
-  int64_t TermInfosReader::size() const{
-  //Func - Return the size of the enumeration of TermInfos
-  //Pre  - true
-  //Post - size has been returened
+    }
+    enumerators.setNull();
+}
 
-      return _size;
-  }
+int64_t TermInfosReader::size() const {
+    //Func - Return the size of the enumeration of TermInfos
+    //Pre  - true
+    //Post - size has been returened
 
+    return _size;
+}
 
-  Term* TermInfosReader::get(const int32_t position) {
-  //Func - Returns the nth term in the set
-  //Pre  - position > = 0
-  //Post - The n-th term in the set has been returned
+Term* TermInfosReader::get(const int32_t position) {
+    //Func - Returns the nth term in the set
+    //Pre  - position > = 0
+    //Post - The n-th term in the set has been returned
 
-         //Check if the size is 0 because then there are no terms
-      if (_size == 0)
-          return NULL;
+    //Check if the size is 0 because then there are no terms
+    if (_size == 0) return NULL;
 
-         SegmentTermEnum* enumerator = getEnum();
+    SegmentTermEnum* enumerator = getEnum();
 
-         if (
-             enumerator != NULL //an enumeration exists
-             && enumerator->term(false) != NULL // term is at or past current
-             && position >= enumerator->position
-                 && position < (enumerator->position + totalIndexInterval)
-            )
-         {
-                 return scanEnum(position);                      // can avoid 
seek
-         }
+    if (enumerator != NULL                 //an enumeration exists
+        && enumerator->term(false) != NULL // term is at or past current
+        && position >= enumerator->position &&
+        position < (enumerator->position + totalIndexInterval)) {
+        return scanEnum(position); // can avoid seek
+    }
 
     //random-access: must seek
     seekEnum(position / totalIndexInterval);
 
-       //Get the Term at position
+    //Get the Term at position
     return scanEnum(position);
-  }
+}
 
-  SegmentTermEnum* TermInfosReader::getEnum(){
+SegmentTermEnum* TermInfosReader::getEnum() {
     SegmentTermEnum* termEnum = enumerators.get();
-    if (termEnum == NULL){
-      termEnum = terms();
-      enumerators.set(termEnum);
+    if (termEnum == NULL) {
+        termEnum = terms();
+        enumerators.set(termEnum);
     }
     return termEnum;
-  }
+}
 
-  TermInfo* TermInfosReader::get(const Term* term){
-  //Func - Returns a TermInfo for a term
-  //Pre  - term holds a valid reference to term
-  //Post - if term can be found its TermInfo has been returned otherwise NULL
+TermInfo* TermInfosReader::get(const Term* term) {
+    //Func - Returns a TermInfo for a term
+    //Pre  - term holds a valid reference to term
+    //Post - if term can be found its TermInfo has been returned otherwise NULL
 
     //If the size of the enumeration is 0 then no Terms have been read
-       if (_size == 0)
-               return NULL;
+    if (_size == 0) return NULL;
 
     ensureIndexIsRead();
 
@@ -233,250 +232,238 @@ CL_NS_DEF(index)
 
     // optimize sequential access: first try scanning cached enumerator w/o 
seeking
     if (
-             //the current term of the enumeration enumerator is not at the 
end AND
-       enumerator->term(false) != NULL  &&
-       (
-            //there exists a previous current called prev and term is 
positioned after this prev OR
-            ( enumerator->prev != NULL && term->compareTo(enumerator->prev) > 
0) ||
-            //term is positioned at the same position as the current of 
enumerator or at a higher position
-            term->compareTo(enumerator->term(false)) >= 0 )
-       )
-     {
-
-               //Calculate the offset for the position
-               int32_t _enumOffset = 
(int32_t)(enumerator->position/totalIndexInterval)+1;
-
-               // but before end of block
-               if (
-                       //the length of indexTerms (the number of terms in 
enumerator) equals
-                       //_enum_offset OR
-                       indexTermsLength == _enumOffset  ||
-                       //term is positioned in front of term found at 
_enumOffset in indexTerms
-                       term->compareTo(&indexTerms[_enumOffset]) < 0){
-
-                       //no need to seek, retrieve the TermInfo for term
-                       return scanEnum(term);
+            //the current term of the enumeration enumerator is not at the end 
AND
+            enumerator->term(false) != NULL &&
+            (
+                    //there exists a previous current called prev and term is 
positioned after this prev OR
+                    (enumerator->prev != NULL && 
term->compareTo(enumerator->prev) > 0) ||
+                    //term is positioned at the same position as the current 
of enumerator or at a higher position
+                    term->compareTo(enumerator->term(false)) >= 0)) {
+        //Calculate the offset for the position
+        int32_t _enumOffset = (int32_t)(enumerator->position / 
totalIndexInterval) + 1;
+
+        // but before end of block
+        if (
+                //the length of indexTerms (the number of terms in enumerator) 
equals
+                //_enum_offset OR
+                indexTermsLength == _enumOffset ||
+                //term is positioned in front of term found at _enumOffset in 
indexTerms
+                term->compareTo(&indexTerms[_enumOffset]) < 0) {
+            //no need to seek, retrieve the TermInfo for term
+            return scanEnum(term);
         }
     }
 
     //Reposition current term in the enumeration
     seekEnum(getIndexOffset(term));
-       //Return the TermInfo for term
+    //Return the TermInfo for term
     return scanEnum(term);
-  }
-
+}
 
-  int64_t TermInfosReader::getPosition(const Term* term) {
-  //Func - Returns the position of a Term in the set
-  //Pre  - term holds a valid reference to a Term
-  //       enumerator != NULL
-  //Post - If term was found then its position is returned otherwise -1
+int64_t TermInfosReader::getPosition(const Term* term) {
+    //Func - Returns the position of a Term in the set
+    //Pre  - term holds a valid reference to a Term
+    //       enumerator != NULL
+    //Post - If term was found then its position is returned otherwise -1
 
-         //if the enumeration is empty then return -1
-         if (_size == 0)
-                 return -1;
+    //if the enumeration is empty then return -1
+    if (_size == 0) return -1;
 
-         ensureIndexIsRead();
-
-      //Retrieve the indexOffset for term
-      int32_t indexOffset = getIndexOffset(term);
-      seekEnum(indexOffset);
+    ensureIndexIsRead();
 
-         SegmentTermEnum* enumerator = getEnum();
+    //Retrieve the indexOffset for term
+    int32_t indexOffset = getIndexOffset(term);
+    seekEnum(indexOffset);
 
-      while(term->compareTo(enumerator->term(false)) > 0 && 
enumerator->next()) {}
+    SegmentTermEnum* enumerator = getEnum();
 
-         if ( term->equals(enumerator->term(false)) ){
-          return enumerator->position;
-         }else
-          return -1;
-  }
+    while (term->compareTo(enumerator->term(false)) > 0 && enumerator->next()) 
{
+    }
 
-  SegmentTermEnum* TermInfosReader::terms(const Term* term) {
-  //Func - Returns an enumeration of terms starting at or after the named term.
-  //       If term is null then enumerator is set to the beginning
-  //Pre  - term holds a valid reference to a Term
-  //       enumerator != NULL
-  //Post - An enumeration of terms starting at or after the named term has 
been returned
+    if (term->equals(enumerator->term(false))) {
+        return enumerator->position;
+    } else
+        return -1;
+}
+
+SegmentTermEnum* TermInfosReader::terms(const Term* term) {
+    //Func - Returns an enumeration of terms starting at or after the named 
term.
+    //       If term is null then enumerator is set to the beginning
+    //Pre  - term holds a valid reference to a Term
+    //       enumerator != NULL
+    //Post - An enumeration of terms starting at or after the named term has 
been returned
+
+    SegmentTermEnum* enumerator = NULL;
+    if (term != NULL) {
+        //Seek enumerator to term; delete the new TermInfo that's returned.
+        TermInfo* ti = get(term);
+        _CLLDELETE(ti);
+        enumerator = getEnum();
+    } else
+        enumerator = origEnum;
+
+    //Clone the entire enumeration
+    SegmentTermEnum* cln = enumerator->clone();
+
+    //Check if cln points to a valid instance
+    CND_CONDITION(cln != NULL, "cln is NULL");
+
+    return cln;
+}
+
+void TermInfosReader::ensureIndexIsRead() {
+    //Func - Reads the term info index file or .tti file.
+    //       This file contains every IndexInterval-th entry from the .tis 
file,
+    //       along with its location in the "tis" file. This is designed to be 
read entirely
+    //       into memory and used to provide random access to the "tis" file.
+    //Pre  - indexTerms    = NULL
+    //       indexInfos    = NULL
+    //       indexPointers = NULL
+    //Post - The term info index file has been read into memory
 
-         SegmentTermEnum* enumerator = NULL;
-         if ( term != NULL ){
-               //Seek enumerator to term; delete the new TermInfo that's 
returned.
-               TermInfo* ti = get(term);
-               _CLLDELETE(ti);
-               enumerator = getEnum();
-         }else
-           enumerator = origEnum;
+    SCOPED_LOCK_MUTEX(THIS_LOCK)
 
-      //Clone the entire enumeration
-      SegmentTermEnum* cln = enumerator->clone();
+    if (indexIsRead) return;
+
+    //https://jira.qianxin-inc.cn/browse/XHBUG-2921
+    //https://jira.qianxin-inc.cn/browse/XHBUG-3053
+    if (indexEnum == NULL) _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
+
+    try {
+        indexTermsLength = (size_t)indexEnum->size;
+
+        //Instantiate an block of Term's,so that each one doesn't have to be 
new'd
+        indexTerms = new Term[indexTermsLength];
+        CND_CONDITION(
+                indexTerms != NULL,
+                "No memory could be allocated for indexTerms"); //Check if is 
indexTerms is a valid array
+
+        //Instantiate an big block of TermInfo's, so that each one doesn't 
have to be new'd
+        indexInfos = _CL_NEWARRAY(TermInfo, indexTermsLength);
+        CND_CONDITION(
+                indexInfos != NULL,
+                "No memory could be allocated for indexInfos"); //Check if is 
indexInfos is a valid array
+
+        //Instantiate an array indexPointers that contains pointers to the 
term info index file
+        indexPointers = _CL_NEWARRAY(int64_t, indexTermsLength);
+        CND_CONDITION(
+                indexPointers != NULL,
+                "No memory could be allocated for indexPointers"); //Check if 
is indexPointers is a valid array
+
+        //Iterate through the terms of indexEnum
+        for (int32_t i = 0; indexEnum->next(); ++i) {
+            indexTerms[i].set(indexEnum->term(false), 
indexEnum->term(false)->text());
+            indexEnum->getTermInfo(&indexInfos[i]);
+            indexPointers[i] = indexEnum->indexPointer;
+
+            for (int32_t j = 1; j < indexDivisor; j++)
+                if (!indexEnum->next()) break;
+        }
+        indexIsRead = true;
+    }
+    _CLFINALLY(indexEnum->close();
+               //Close and delete the IndexInput is. The close is done by the 
destructor.
+               _CLDELETE(indexEnum->input); _CLDELETE(indexEnum); indexEnum = 
NULL;);
+}
+
+int32_t TermInfosReader::getIndexOffset(const Term* term) {
+    //Func - Returns the offset of the greatest index entry which is less than 
or equal to term.
+    //Pre  - term holds a reference to a valid term
+    //       indexTerms != NULL
+    //Post - The new offset has been returned
+
+    //Check if is indexTerms is a valid array
+    CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
+
+    int32_t lo = 0;
+    int32_t hi = indexTermsLength - 1;
+    int32_t mid;
+    int32_t delta;
+
+    while (hi >= lo) {
+        //Start in the middle betwee hi and lo
+        mid = (lo + hi) >> 1;
+
+        //Check if is indexTerms[mid] is a valid instance of Term
+        CND_PRECONDITION(&indexTerms[mid] != NULL, "indexTerms[mid] is NULL");
+        CND_PRECONDITION(mid < indexTermsLength, "mid >= indexTermsLength");
+
+        //Determine if term is before mid or after mid
+        delta = term->compareTo(&indexTerms[mid]);
+        if (delta < 0) {
+            //Calculate the new hi
+            hi = mid - 1;
+        } else if (delta > 0) {
+            //Calculate the new lo
+            lo = mid + 1;
+        } else {
+            //term has been found so return its position
+            return mid;
+        }
+    }
+    // the new starting offset
+    return hi;
+}
+
+void TermInfosReader::seekEnum(const int32_t indexOffset) {
+    //Func - Reposition the current Term and TermInfo to indexOffset
+    //Pre  - indexOffset >= 0
+    //       indexTerms    != NULL
+    //       indexInfos    != NULL
+    //       indexPointers != NULL
+    //Post - The current Term and Terminfo have been repositioned to 
indexOffset
+
+    CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative 
number");
+    CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
+    CND_PRECONDITION(indexInfos != NULL, "indexInfos is NULL");
+    CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");
 
-      //Check if cln points to a valid instance
-      CND_CONDITION(cln != NULL,"cln is NULL");
+    SegmentTermEnum* enumerator = getEnum();
+    enumerator->seek(indexPointers[indexOffset], (indexOffset * 
totalIndexInterval) - 1,
+                     &indexTerms[indexOffset], &indexInfos[indexOffset]);
+}
+
+TermInfo* TermInfosReader::scanEnum(const Term* term) {
+    //Func - Scans the Enumeration of terms for term and returns the 
corresponding TermInfo instance if found.
+    //       The search is started from the current term.
+    //Pre  - term contains a valid reference to a Term
+    //       enumerator != NULL
+    //Post - if term has been found the corresponding TermInfo has been 
returned otherwise NULL
+    //       has been returned
 
-      return cln;
-  }
+    SegmentTermEnum* enumerator = getEnum();
+    enumerator->scanTo(term);
+
+    //Check if the at the position the Term term can be found
+    if (enumerator->term(false) != NULL && 
term->equals(enumerator->term(false))) {
+        //Return the TermInfo instance about term
+        return enumerator->getTermInfo();
+    } else {
+        //term was not found so no TermInfo can be returned
+        return NULL;
+    }
+}
 
+Term* TermInfosReader::scanEnum(const int32_t position) {
+    //Func - Scans the enumeration to the requested position and returns the
+    //       Term located at that position
+    //Pre  - position > = 0
+    //       enumerator != NULL
+    //Post - The Term at the requested position has been returned
 
-  void TermInfosReader::ensureIndexIsRead() {
-  //Func - Reads the term info index file or .tti file.
-  //       This file contains every IndexInterval-th entry from the .tis file,
-  //       along with its location in the "tis" file. This is designed to be 
read entirely
-  //       into memory and used to provide random access to the "tis" file.
-  //Pre  - indexTerms    = NULL
-  //       indexInfos    = NULL
-  //       indexPointers = NULL
-  //Post - The term info index file has been read into memory
+    SegmentTermEnum* enumerator = getEnum();
 
-    SCOPED_LOCK_MUTEX(THIS_LOCK)
+    //As long the position of the enumeration enumerator is smaller than the 
requested one
+    while (enumerator->position < position) {
+        //Move the current of enumerator to the next
+        if (!enumerator->next()) {
+            //If there is no next it means that the requested position was to 
big
+            return NULL;
+        }
+    }
 
-         if (indexIsRead)
-                 return;
-
-      //https://jira.qianxin-inc.cn/browse/XHBUG-2921
-      //https://jira.qianxin-inc.cn/browse/XHBUG-3053
-      if (indexEnum == NULL)
-         _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
-
-      try {
-          indexTermsLength = (size_t)indexEnum->size;
-
-                     //Instantiate an block of Term's,so that each one doesn't 
have to be new'd
-          indexTerms    = new Term[indexTermsLength];
-          CND_CONDITION(indexTerms != NULL,"No memory could be allocated for 
indexTerms");//Check if is indexTerms is a valid array
-
-                 //Instantiate an big block of TermInfo's, so that each one 
doesn't have to be new'd
-          indexInfos    = _CL_NEWARRAY(TermInfo,indexTermsLength);
-          CND_CONDITION(indexInfos != NULL,"No memory could be allocated for 
indexInfos"); //Check if is indexInfos is a valid array
-
-          //Instantiate an array indexPointers that contains pointers to the 
term info index file
-          indexPointers = _CL_NEWARRAY(int64_t,indexTermsLength);
-          CND_CONDITION(indexPointers != NULL,"No memory could be allocated 
for indexPointers");//Check if is indexPointers is a valid array
-
-                 //Iterate through the terms of indexEnum
-          for (int32_t i = 0; indexEnum->next(); ++i){
-              
indexTerms[i].set(indexEnum->term(false),indexEnum->term(false)->text());
-              indexEnum->getTermInfo(&indexInfos[i]);
-              indexPointers[i] = indexEnum->indexPointer;
-
-                               for (int32_t j = 1; j < indexDivisor; j++)
-                                       if (!indexEnum->next())
-                                               break;
-          }
-         indexIsRead = true;
-    }_CLFINALLY(
-          indexEnum->close();
-                 //Close and delete the IndexInput is. The close is done by 
the destructor.
-          _CLDELETE( indexEnum->input );
-          _CLDELETE( indexEnum );
-         indexEnum = NULL;
-    );
-  }
-
-
-  int32_t TermInfosReader::getIndexOffset(const Term* term){
-  //Func - Returns the offset of the greatest index entry which is less than 
or equal to term.
-  //Pre  - term holds a reference to a valid term
-  //       indexTerms != NULL
-  //Post - The new offset has been returned
-
-      //Check if is indexTerms is a valid array
-      CND_PRECONDITION(indexTerms != NULL,"indexTerms is NULL");
-
-      int32_t lo = 0;
-      int32_t hi = indexTermsLength - 1;
-         int32_t mid;
-         int32_t delta;
-
-      while (hi >= lo) {
-          //Start in the middle betwee hi and lo
-          mid = (lo + hi) >> 1;
-
-          //Check if is indexTerms[mid] is a valid instance of Term
-          CND_PRECONDITION(&indexTerms[mid] != NULL,"indexTerms[mid] is NULL");
-          CND_PRECONDITION(mid < indexTermsLength,"mid >= indexTermsLength");
-
-                 //Determine if term is before mid or after mid
-          delta = term->compareTo(&indexTerms[mid]);
-          if (delta < 0){
-              //Calculate the new hi
-              hi = mid - 1;
-          }else if (delta > 0){
-                  //Calculate the new lo
-                  lo = mid + 1;
-                         }else{
-                  //term has been found so return its position
-                  return mid;
-          }
-     }
-     // the new starting offset
-     return hi;
-  }
-
-  void TermInfosReader::seekEnum(const int32_t indexOffset) {
-  //Func - Reposition the current Term and TermInfo to indexOffset
-  //Pre  - indexOffset >= 0
-  //       indexTerms    != NULL
-  //       indexInfos    != NULL
-  //       indexPointers != NULL
-  //Post - The current Term and Terminfo have been repositioned to indexOffset
-
-      CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative 
number");
-      CND_PRECONDITION(indexTerms != NULL,    "indexTerms is NULL");
-      CND_PRECONDITION(indexInfos != NULL,    "indexInfos is NULL");
-      CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");
-
-         SegmentTermEnum* enumerator =  getEnum();
-         enumerator->seek(
-          indexPointers[indexOffset],
-                 (indexOffset * totalIndexInterval) - 1,
-          &indexTerms[indexOffset],
-                 &indexInfos[indexOffset]
-             );
-  }
-
-
-  TermInfo* TermInfosReader::scanEnum(const Term* term) {
-  //Func - Scans the Enumeration of terms for term and returns the 
corresponding TermInfo instance if found.
-  //       The search is started from the current term.
-  //Pre  - term contains a valid reference to a Term
-  //       enumerator != NULL
-  //Post - if term has been found the corresponding TermInfo has been returned 
otherwise NULL
-  //       has been returned
-
-      SegmentTermEnum* enumerator = getEnum();
-         enumerator->scanTo(term);
-
-      //Check if the at the position the Term term can be found
-         if (enumerator->term(false) != NULL && 
term->equals(enumerator->term(false)) ){
-                 //Return the TermInfo instance about term
-          return enumerator->getTermInfo();
-     }else{
-          //term was not found so no TermInfo can be returned
-          return NULL;
-     }
-  }
-
-  Term* TermInfosReader::scanEnum(const int32_t position) {
-  //Func - Scans the enumeration to the requested position and returns the
-  //       Term located at that position
-  //Pre  - position > = 0
-  //       enumerator != NULL
-  //Post - The Term at the requested position has been returned
-
-      SegmentTermEnum* enumerator = getEnum();
-
-         //As long the position of the enumeration enumerator is smaller than 
the requested one
-      while(enumerator->position < position){
-                 //Move the current of enumerator to the next
-                 if (!enumerator->next()){
-                         //If there is no next it means that the requested 
position was to big
-              return NULL;
-          }
-         }
-
-         //Return the Term a the requested position
-         return enumerator->term();
-  }
+    //Return the Term a the requested position
+    return enumerator->term();
+}
 
 CL_NS_END
diff --git a/src/core/CLucene/index/_SegmentTermEnum.h 
b/src/core/CLucene/index/_SegmentTermEnum.h
index b5fa419d..3dd2c8c5 100644
--- a/src/core/CLucene/index/_SegmentTermEnum.h
+++ b/src/core/CLucene/index/_SegmentTermEnum.h
@@ -55,7 +55,8 @@ protected:
 
 public:
        ///Constructor
-       SegmentTermEnum(CL_NS(store)::IndexInput* i, FieldInfos* fis, const 
bool isi, int32_t in_format = -1);
+       SegmentTermEnum(CL_NS(store)::IndexInput* i, FieldInfos* fis, const 
bool isi);
+        void init(int32_t in_format = -1);
 
        ///Destructor
        ~SegmentTermEnum();
diff --git a/src/core/CLucene/store/IndexOutput.cpp 
b/src/core/CLucene/store/IndexOutput.cpp
index 05e7695f..77c37400 100644
--- a/src/core/CLucene/store/IndexOutput.cpp
+++ b/src/core/CLucene/store/IndexOutput.cpp
@@ -35,12 +35,13 @@ CL_NS_DEF(store)
                close();
   }
 
-  void BufferedIndexOutput::close(){
-    flush();
-    _CLDELETE_ARRAY( buffer );
-
-    bufferStart = 0;
-    bufferPosition = 0;
+  void BufferedIndexOutput::close() {
+      // flush may throw error here, if we do not delete buffer for all 
circumstances,
+      // we may close again in destructor above, that would cause pure virtual 
function call for flushBuffer
+      try {
+          flush();
+      }
+      _CLFINALLY(_CLDELETE_ARRAY(buffer); bufferStart = 0; bufferPosition = 0;)
   }
 
   void BufferedIndexOutput::writeByte(const uint8_t b) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris-thirdparty) branch clucene updated: [Fix](memory leak) fix memory leak found in fault injection case (#170)

Reply via email to