This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new d3bedb2d [Fix](memory leak) fix memory leak found in fault injection
case (#170)
d3bedb2d is described below
commit d3bedb2d55484d80ae53e008287733f9d24ea72b
Author: airborne12 <[email protected]>
AuthorDate: Mon Jan 8 19:13:26 2024 +0800
[Fix](memory leak) fix memory leak found in fault injection case (#170)
---
src/core/CLucene/index/SegmentInfos.cpp | 3 +
src/core/CLucene/index/SegmentReader.cpp | 7 +-
src/core/CLucene/index/SegmentTermEnum.cpp | 801 ++++++++++++++---------------
src/core/CLucene/index/TermInfosReader.cpp | 737 +++++++++++++-------------
src/core/CLucene/index/_SegmentTermEnum.h | 3 +-
src/core/CLucene/store/IndexOutput.cpp | 13 +-
6 files changed, 778 insertions(+), 786 deletions(-)
diff --git a/src/core/CLucene/index/SegmentInfos.cpp
b/src/core/CLucene/index/SegmentInfos.cpp
index 03532129..60a36954 100644
--- a/src/core/CLucene/index/SegmentInfos.cpp
+++ b/src/core/CLucene/index/SegmentInfos.cpp
@@ -826,6 +826,9 @@ string SegmentInfo::segString(Directory* dir) {
// Try not to leave a truncated segments_N file in
// the index:
directory->deleteFile(segmentFileName.c_str());
+ if (output != nullptr) {
+ _CLDELETE(output);
+ }
}
)
)
diff --git a/src/core/CLucene/index/SegmentReader.cpp
b/src/core/CLucene/index/SegmentReader.cpp
index f7741a9f..72126366 100644
--- a/src/core/CLucene/index/SegmentReader.cpp
+++ b/src/core/CLucene/index/SegmentReader.cpp
@@ -257,7 +257,12 @@ SegmentReader *SegmentReader::get(Directory *dir,
SegmentInfo *si,
instance->init(dir, sis, closeDir);
// TODO: make this configurable...
bool fieldsReaderExist = false;
- instance->initialize(si, readBufferSize == -1 ?
BufferedIndexInput::BUFFER_SIZE : readBufferSize, doOpenStores,
fieldsReaderExist);
+ try {
+ instance->initialize(si, readBufferSize == -1 ?
BufferedIndexInput::BUFFER_SIZE : readBufferSize, doOpenStores,
fieldsReaderExist);
+ } catch (CLuceneError& e) {
+ _CLDELETE(instance)
+ throw e;
+ }
return instance;
}
diff --git a/src/core/CLucene/index/SegmentTermEnum.cpp
b/src/core/CLucene/index/SegmentTermEnum.cpp
index 574d9396..8179c7b7 100644
--- a/src/core/CLucene/index/SegmentTermEnum.cpp
+++ b/src/core/CLucene/index/SegmentTermEnum.cpp
@@ -5,424 +5,419 @@
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/_ApiHeader.h"
-#include "_SegmentHeader.h"
-#include "_SegmentTermEnum.h"
-
+#include "Term.h"
#include "Terms.h"
#include "_FieldInfos.h"
-#include "Term.h"
+#include "_SegmentHeader.h"
+#include "_SegmentTermEnum.h"
#include "_TermInfo.h"
#include "_TermInfosWriter.h"
CL_NS_USE(store)
CL_NS_DEF(index)
- SegmentTermEnum::SegmentTermEnum(IndexInput* i, FieldInfos* fis, const
bool isi, int32_t in_format):
- fieldInfos(fis){
- //Func - Constructor
- //Pre - i holds a reference to an instance of IndexInput
- // fis holds a reference to an instance of FieldInfos
- // isi
- //Post - An instance of SegmentTermEnum has been created
- input = i;
- position = -1;
- //Instantiate a Term with empty field, empty text and which is
interned (see term.h what interned means)
- _term = _CLNEW Term;
- isIndex = isi;
- termInfo = _CLNEW TermInfo();
- indexPointer = 0;
- buffer = NULL;
- bufferLength = 0;
- prev = NULL;
- formatM1SkipInterval = 0;
- maxSkipLevels = 1;
-
- //Set isClone to false as the instance is not clone of another
instance
- isClone = false;
-
- int32_t firstInt = in_format == -4 ? in_format :
input->readInt();
+SegmentTermEnum::SegmentTermEnum(IndexInput* i, FieldInfos* fis, const bool
isi)
+ : fieldInfos(fis) {
+ //Func - Constructor
+ //Pre - i holds a reference to an instance of IndexInput
+ // fis holds a reference to an instance of FieldInfos
+ // isi
+ //Post - An instance of SegmentTermEnum has been created
+ input = i;
+ position = -1;
+ //Instantiate a Term with empty field, empty text and which is interned
(see term.h what interned means)
+ _term = _CLNEW Term;
+ isIndex = isi;
+ termInfo = _CLNEW TermInfo();
+ indexPointer = 0;
+ buffer = NULL;
+ bufferLength = 0;
+ prev = NULL;
+ formatM1SkipInterval = 0;
+ maxSkipLevels = 1;
+
+ //Set isClone to false as the instance is not clone of another instance
+ isClone = false;
+}
+
+void SegmentTermEnum::init(int32_t in_format) {
+ int32_t firstInt = in_format == -4 ? in_format : input->readInt();
if (firstInt >= 0) {
- // original-format file, without explicit format version number
- format = 0;
- size = firstInt;
+ // original-format file, without explicit format version number
+ format = 0;
+ size = firstInt;
- // back-compatible settings
- indexInterval = 128;
- skipInterval = LUCENE_INT32_MAX_SHOULDBE; // switch off skipTo
optimization
+ // back-compatible settings
+ indexInterval = 128;
+ skipInterval = LUCENE_INT32_MAX_SHOULDBE; // switch off skipTo
optimization
- } else {
- // we have a format version number
- format = firstInt;
+ } else {
+ // we have a format version number
+ format = firstInt;
- // check that it is a format we can understand
- if (format < TermInfosWriter::FORMAT){
+ // check that it is a format we can understand
+ if (format < TermInfosWriter::FORMAT) {
TCHAR err[30];
- _sntprintf(err,30,_T("Unknown format version: %d"), format);
- _CLTHROWT(CL_ERR_CorruptIndex,err);
- }
-
- if (format == -4) {
- if (isIndex) {
- size =
input->readLong();
- if (size < 0) {
- auto pos =
input->getFilePointer();
-
input->seek(input->length() - 16);
- size =
input->readLong();
- tisSize =
input->readLong();
-
input->seek(pos);
- }
-
- indexInterval =
input->readInt();
- skipInterval =
input->readInt();
- maxSkipLevels =
input->readInt();
- }
- } else {
- size = input->readLong();
// read the size
- if (size < 0) {
// read the size at file footer, if size < 0
- auto pos =
input->getFilePointer();
-
input->seek(input->length() - 8);
- size =
input->readLong();
-
input->seek(pos);
- }
-
- if(format == -1){
- if (!isIndex) {
-
indexInterval = input->readInt();
-
formatM1SkipInterval = input->readInt();
- }
- // switch off
skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
- // skipTo
implementation of these versions
- skipInterval =
LUCENE_INT32_MAX_SHOULDBE;
- }else{
- indexInterval =
input->readInt();
- skipInterval =
input->readInt();
- if ( format ==
-3 ) {
- // this
new format introduces multi-level skipping
-
maxSkipLevels = input->readInt();
- }
- }
- }
- }
- }
-
- SegmentTermEnum::SegmentTermEnum(const SegmentTermEnum& clone):
- fieldInfos(clone.fieldInfos)
- {
- //Func - Constructor
- // The instance is created by cloning all properties of clone
- //Pre - clone holds a valid reference to SegmentTermEnum
- //Post - An instance of SegmentTermEnum with the same properties as
clone
-
- input = clone.input->clone();
- //Copy the postion from the clone
- position = clone.position;
-
- if ( clone._term != NULL ){
- _term = _CLNEW Term;
- _term->set(clone._term,clone._term->text());
- }else
- _term = NULL;
- isIndex = clone.isIndex;
- termInfo = _CLNEW TermInfo(clone.termInfo);
- indexPointer = clone.indexPointer;
- buffer =
clone.buffer==NULL?NULL:(TCHAR*)malloc(sizeof(TCHAR) * (clone.bufferLength+1));
- bufferLength = clone.bufferLength;
- prev = clone.prev==NULL?NULL:_CLNEW
Term(clone.prev->field(),clone.prev->text(),false);
- size = clone.size;
- tisSize = clone.tisSize;
-
- format = clone.format;
- indexInterval= clone.indexInterval;
- skipInterval = clone.skipInterval;
- formatM1SkipInterval = clone.formatM1SkipInterval;
- maxSkipLevels = clone.maxSkipLevels;
-
- //Set isClone to true as this instance is a clone of another
instance
- isClone = true;
-
- //Copy the contents of buffer of clone to the buffer of this
instance
- if ( clone.buffer != NULL )
- memcpy(buffer,clone.buffer,bufferLength *
sizeof(TCHAR));
- }
-
- SegmentTermEnum::~SegmentTermEnum(){
- //Func - Destructor
- //Pre - true
- //Post - The instance has been destroyed. If this instance was a clone
- // then the inputstream is closed and deleted too.
-
- //todo: revisit this... close() should clean up most of everything.
-
- //Finalize prev
- _CLDECDELETE(prev );
- //Finalize term
- _CLDECDELETE( _term );
-
-
- //Delete the buffer if necessary
- if ( buffer != NULL ) free(buffer);
- //Delete termInfo if necessary
- _CLDELETE(termInfo);
-
- //Check if this instance is a clone
- if ( isClone ){
- //Close the inputstream
- input->close();
- //delete the inputstream
- _CLDELETE(input);
- }
- }
-
- void SegmentTermEnum::initByTii(SegmentTermEnum* tii) {
- if (format == -4) {
- size = tii->tisSize;
- indexInterval = tii->indexInterval;
- skipInterval = tii->skipInterval;
- maxSkipLevels = tii->maxSkipLevels;
- size_t header = sizeof(format) +
-
sizeof(size) +
-
sizeof(indexInterval) +
-
sizeof(skipInterval) +
-
sizeof(maxSkipLevels);
- input->seek(header);
- }
- }
-
- const char* SegmentTermEnum::getObjectName() const{ return
getClassName(); }
- const char* SegmentTermEnum::getClassName(){ return "SegmentTermEnum"; }
-
- bool SegmentTermEnum::next(){
- //Func - Moves the current of the set to the next in the set
- //Pre - true
- //Post - If the end has been reached NULL is returned otherwise the
term has
- // become the next Term in the enumeration
-
- //Increase position by and and check if the end has been reached
- if (position++ >= size-1) {
- //delete term
- _CLDECDELETE(_term);
- return false;
- }
-
- //delete the previous enumerated term
- Term* tmp=NULL;
- if ( prev != NULL ){
- if ( _LUCENE_ATOMIC_INT_GET(prev->__cl_refcount) > 1 ){
- _CLDECDELETE(prev); //todo: tune other places
try and delete its term
- }else
- tmp = prev; //we are going to re-use this term
- }
- //prev becomes the current enumerated term
- prev = _term;
- //term becomes the next term read from inputStream input
- _term = readTerm(tmp);
-
- //Read docFreq, the number of documents which contain the term.
- termInfo->docFreq = input->readVInt();
- //Read freqPointer, a pointer into the TermFreqs file (.frq)
- termInfo->freqPointer += input->readVLong();
-
- //Read proxPointer, a pointer into the TermPosition file (.prx).
- termInfo->proxPointer += input->readVLong();
-
- if(format == -1){
- // just read skipOffset in order to increment file pointer;
- // value is never used since skipTo is switched off
- if (!isIndex) {
+ _sntprintf(err, 30, _T("Unknown format version: %d"), format);
+ _CLTHROWT(CL_ERR_CorruptIndex, err);
+ }
+
+ if (format == -4) {
+ if (isIndex) {
+ size = input->readLong();
+ if (size < 0) {
+ auto pos = input->getFilePointer();
+ input->seek(input->length() - 16);
+ size = input->readLong();
+ tisSize = input->readLong();
+ input->seek(pos);
+ }
+
+ indexInterval = input->readInt();
+ skipInterval = input->readInt();
+ maxSkipLevels = input->readInt();
+ }
+ } else {
+ size = input->readLong(); // read the size
+ if (size < 0) { // read the size at file footer, if size
< 0
+ auto pos = input->getFilePointer();
+ input->seek(input->length() - 8);
+ size = input->readLong();
+ input->seek(pos);
+ }
+
+ if (format == -1) {
+ if (!isIndex) {
+ indexInterval = input->readInt();
+ formatM1SkipInterval = input->readInt();
+ }
+ // switch off skipTo optimization for file format prior to
1.4rc2 in order to avoid a bug in
+ // skipTo implementation of these versions
+ skipInterval = LUCENE_INT32_MAX_SHOULDBE;
+ } else {
+ indexInterval = input->readInt();
+ skipInterval = input->readInt();
+ if (format == -3) {
+ // this new format introduces multi-level skipping
+ maxSkipLevels = input->readInt();
+ }
+ }
+ }
+ }
+}
+
+SegmentTermEnum::SegmentTermEnum(const SegmentTermEnum& clone) :
fieldInfos(clone.fieldInfos) {
+ //Func - Constructor
+ // The instance is created by cloning all properties of clone
+ //Pre - clone holds a valid reference to SegmentTermEnum
+ //Post - An instance of SegmentTermEnum with the same properties as clone
+
+ input = clone.input->clone();
+ //Copy the postion from the clone
+ position = clone.position;
+
+ if (clone._term != NULL) {
+ _term = _CLNEW Term;
+ _term->set(clone._term, clone._term->text());
+ } else
+ _term = NULL;
+ isIndex = clone.isIndex;
+ termInfo = _CLNEW TermInfo(clone.termInfo);
+ indexPointer = clone.indexPointer;
+ buffer = clone.buffer == NULL ? NULL : (TCHAR*)malloc(sizeof(TCHAR) *
(clone.bufferLength + 1));
+ bufferLength = clone.bufferLength;
+ prev = clone.prev == NULL ? NULL : _CLNEW Term(clone.prev->field(),
clone.prev->text(), false);
+ size = clone.size;
+ tisSize = clone.tisSize;
+
+ format = clone.format;
+ indexInterval = clone.indexInterval;
+ skipInterval = clone.skipInterval;
+ formatM1SkipInterval = clone.formatM1SkipInterval;
+ maxSkipLevels = clone.maxSkipLevels;
+
+ //Set isClone to true as this instance is a clone of another instance
+ isClone = true;
+
+ //Copy the contents of buffer of clone to the buffer of this instance
+ if (clone.buffer != NULL) memcpy(buffer, clone.buffer, bufferLength *
sizeof(TCHAR));
+}
+
+SegmentTermEnum::~SegmentTermEnum() {
+ //Func - Destructor
+ //Pre - true
+ //Post - The instance has been destroyed. If this instance was a clone
+ // then the inputstream is closed and deleted too.
+
+ //todo: revisit this... close() should clean up most of everything.
+
+ //Finalize prev
+ _CLDECDELETE(prev);
+ //Finalize term
+ _CLDECDELETE(_term);
+
+ //Delete the buffer if necessary
+ if (buffer != NULL) free(buffer);
+ //Delete termInfo if necessary
+ _CLDELETE(termInfo);
+
+ //Check if this instance is a clone
+ if (isClone) {
+ //Close the inputstream
+ input->close();
+ //delete the inputstream
+ _CLDELETE(input);
+ }
+}
+
+void SegmentTermEnum::initByTii(SegmentTermEnum* tii) {
+ if (format == -4) {
+ size = tii->tisSize;
+ indexInterval = tii->indexInterval;
+ skipInterval = tii->skipInterval;
+ maxSkipLevels = tii->maxSkipLevels;
+ size_t header = sizeof(format) + sizeof(size) + sizeof(indexInterval) +
+ sizeof(skipInterval) + sizeof(maxSkipLevels);
+ input->seek(header);
+ }
+}
+
+const char* SegmentTermEnum::getObjectName() const {
+ return getClassName();
+}
+const char* SegmentTermEnum::getClassName() {
+ return "SegmentTermEnum";
+}
+
+bool SegmentTermEnum::next() {
+ //Func - Moves the current of the set to the next in the set
+ //Pre - true
+ //Post - If the end has been reached NULL is returned otherwise the term
has
+ // become the next Term in the enumeration
+
+ //Increase position by and and check if the end has been reached
+ if (position++ >= size - 1) {
+ //delete term
+ _CLDECDELETE(_term);
+ return false;
+ }
+
+ //delete the previous enumerated term
+ Term* tmp = NULL;
+ if (prev != NULL) {
+ if (_LUCENE_ATOMIC_INT_GET(prev->__cl_refcount) > 1) {
+ _CLDECDELETE(prev); //todo: tune other places try and delete its
term
+ } else
+ tmp = prev; //we are going to re-use this term
+ }
+ //prev becomes the current enumerated term
+ prev = _term;
+ //term becomes the next term read from inputStream input
+ _term = readTerm(tmp);
+
+ //Read docFreq, the number of documents which contain the term.
+ termInfo->docFreq = input->readVInt();
+ //Read freqPointer, a pointer into the TermFreqs file (.frq)
+ termInfo->freqPointer += input->readVLong();
+
+ //Read proxPointer, a pointer into the TermPosition file (.prx).
+ termInfo->proxPointer += input->readVLong();
+
+ if (format == -1) {
+ // just read skipOffset in order to increment file pointer;
+ // value is never used since skipTo is switched off
+ if (!isIndex) {
if (termInfo->docFreq > formatM1SkipInterval) {
- termInfo->skipOffset = input->readVInt();
+ termInfo->skipOffset = input->readVInt();
}
- }
- }else{
- if (termInfo->docFreq >= skipInterval)
- termInfo->skipOffset = input->readVInt();
- }
-
- //Check if the enumeration is an index
- if (isIndex)
- //read index pointer
- indexPointer += input->readVLong();
-
- return true;
- }
-
- Term* SegmentTermEnum::term(bool pointer) {
- if ( pointer )
- return _CL_POINTER(_term);
- else
- return _term;
- }
-
- void SegmentTermEnum::scanTo(const Term *term){
- //Func - Scan for Term without allocating new Terms
- //Pre - term != NULL
- //Post - The iterator term has been moved to the position where Term is
expected to be
- // in the enumeration
- while ( term->compareTo(this->_term) > 0 && next())
- {
- }
- }
-
- void SegmentTermEnum::close() {
- //Func - Closes the enumeration to further activity, freeing resources.
- //Pre - true
- //Post - The inputStream input has been closed
-
- input->close();
- }
-
- int32_t SegmentTermEnum::docFreq() const {
- //Func - Returns the document frequency of the current term in the set
- //Pre - termInfo != NULL
- // next() must have been called once
- //Post - The document frequency of the current enumerated term has
been returned
-
- return termInfo->docFreq;
- }
-
- void SegmentTermEnum::seek(const int64_t pointer, const int32_t p,
Term* t, TermInfo* ti) {
- //Func - Repositions term and termInfo within the enumeration
- //Pre - pointer >= 0
- // p >= 0 and contains the new position within the enumeration
- // t is a valid reference to a Term and is the new current term
in the enumeration
- // ti is a valid reference to a TermInfo and is corresponding
TermInfo form the new
- // current Term
- //Post - term and terminfo have been repositioned within the enumeration
-
- //Reset the IndexInput input to pointer
- input->seek(pointer);
- //Assign the new position
- position = p;
-
- //finalize the current term
- if ( _term == NULL ||
_LUCENE_ATOMIC_INT_GET(_term->__cl_refcount) > 1 ){
- _CLDECDELETE(_term);
- //Get a pointer from t and increase the reference
counter of t
- _term = _CLNEW Term; //cannot use reference, because
TermInfosReader uses non ref-counted array
- }
- _term->set(t,t->text());
-
- //finalize prev
- _CLDECDELETE(prev);
-
- //Change the current termInfo so it matches the new current term
- termInfo->set(ti);
-
- //Have the buffer grown if needed
- if ( bufferLength <= _term->textLength() )
- growBuffer(_term->textLength(), true ); //
copy term text into buffer
- else
- _tcsncpy(buffer,_term->text(),bufferLength); //just
copy the buffer
- }
-
- TermInfo* SegmentTermEnum::getTermInfo()const {
- //Func - Returns a clone of the current termInfo
- //Pre - termInfo != NULL
- // next() must have been called once
- //Post - A clone of the current termInfo has been returned
-
- return _CLNEW TermInfo(*termInfo); //clone
- }
-
- void SegmentTermEnum::getTermInfo(TermInfo* ti)const {
- //Func - Retrieves a clone of termInfo through the reference ti
- //Pre - ti contains a valid reference to TermInfo
- // termInfo != NULL
- // next() must have been called once
- //Post - ti contains a clone of termInfo
-
- ti->set(termInfo);
- }
-
- int64_t SegmentTermEnum::freqPointer()const {
- //Func - Returns the freqpointer of the current termInfo
- //Pre - termInfo != NULL
- // next() must have been called once
- //Post - The freqpointer of the current termInfo has been returned
-
- return termInfo->freqPointer;
- }
-
- int64_t SegmentTermEnum::proxPointer()const {
- //Func - Returns the proxPointer of the current termInfo
- //Pre - termInfo != NULL
- // next() must have been called once
- //Post - the proxPointer of the current termInfo has been returned
-
- return termInfo->proxPointer;
- }
-
- SegmentTermEnum* SegmentTermEnum::clone() const {
- //Func - Returns a clone of this instance
- //Pre - true
- //Post - An clone of this instance has been returned
-
- return _CLNEW SegmentTermEnum(*this);
- }
-
- Term* SegmentTermEnum::readTerm(Term* reuse) {
- //Func - Reads the next term in the enumeration
- //Pre - true
- //Post - The next Term in the enumeration has been read and returned
-
- //Read the start position from the inputStream input
- int32_t start = input->readVInt();
- //Read the length of term in the inputStream input
- int32_t length = input->readVInt();
-
- //Calculated the total lenght of bytes that buffer must be to
contain the current
- //chars in buffer and the new ones yet to be read
- uint32_t totalLength = start + length;
-
- if (static_cast<uint32_t>(bufferLength) < totalLength+1)
- growBuffer(totalLength, false); //dont copy the buffer
over.
-
- //Read a length number of characters into the buffer from
position start in the inputStream input
- input->readChars(buffer, start, length);
- //Null terminate the string
- buffer[totalLength] = 0;
-
- //Return a new Term
- int32_t field = input->readVInt();
- const TCHAR* fieldname = fieldInfos->fieldName(field);
- if ( reuse == NULL )
- reuse = _CLNEW Term;
-
- reuse->set(fieldname, buffer, false);
- return reuse;
- }
-
- void SegmentTermEnum::growBuffer(const uint32_t length, bool
force_copy) {
- //Func - Instantiate a buffer of length length+1
- //Pre - length > 0
- //Post - pre(buffer) has been deleted with its contents. A new buffer
- // has been allocated of length length+1 and the text of term has
been copied
- // to buffer
- //todo: we could guess that we will need to re-grow this
- //buffer a few times...so start off with a reasonable grow
- //value...
- if ( bufferLength > length )
- return;
-
- //Store the new bufferLength
- if ( length - bufferLength < 8 )
- bufferLength = length+8;
- else
- bufferLength = length+1;
-
- bool copy = buffer==NULL;
-
- //Instantiate the new buffer + 1 is needed for terminator '\0'
- if ( buffer == NULL )
- buffer = (TCHAR*)malloc(sizeof(TCHAR) *
(bufferLength+1));
- else
- buffer = (TCHAR*)realloc(buffer, sizeof(TCHAR) *
(bufferLength+1));
-
- if ( copy || force_copy){
- //Copy the text of term into buffer
- _tcsncpy(buffer,_term->text(),bufferLength);
- }
- }
+ }
+ } else {
+ if (termInfo->docFreq >= skipInterval) termInfo->skipOffset =
input->readVInt();
+ }
+
+ //Check if the enumeration is an index
+ if (isIndex)
+ //read index pointer
+ indexPointer += input->readVLong();
+
+ return true;
+}
+
+Term* SegmentTermEnum::term(bool pointer) {
+ if (pointer)
+ return _CL_POINTER(_term);
+ else
+ return _term;
+}
+
+void SegmentTermEnum::scanTo(const Term* term) {
+ //Func - Scan for Term without allocating new Terms
+ //Pre - term != NULL
+ //Post - The iterator term has been moved to the position where Term is
expected to be
+ // in the enumeration
+ while (term->compareTo(this->_term) > 0 && next()) {
+ }
+}
+
+void SegmentTermEnum::close() {
+ //Func - Closes the enumeration to further activity, freeing resources.
+ //Pre - true
+ //Post - The inputStream input has been closed
+
+ input->close();
+}
+
+int32_t SegmentTermEnum::docFreq() const {
+ //Func - Returns the document frequency of the current term in the set
+ //Pre - termInfo != NULL
+ // next() must have been called once
+ //Post - The document frequency of the current enumerated term has been
returned
+
+ return termInfo->docFreq;
+}
+
+void SegmentTermEnum::seek(const int64_t pointer, const int32_t p, Term* t,
TermInfo* ti) {
+ //Func - Repositions term and termInfo within the enumeration
+ //Pre - pointer >= 0
+ // p >= 0 and contains the new position within the enumeration
+ // t is a valid reference to a Term and is the new current term in
the enumeration
+ // ti is a valid reference to a TermInfo and is corresponding
TermInfo form the new
+ // current Term
+ //Post - term and terminfo have been repositioned within the enumeration
+
+ //Reset the IndexInput input to pointer
+ input->seek(pointer);
+ //Assign the new position
+ position = p;
+
+ //finalize the current term
+ if (_term == NULL || _LUCENE_ATOMIC_INT_GET(_term->__cl_refcount) > 1) {
+ _CLDECDELETE(_term);
+ //Get a pointer from t and increase the reference counter of t
+ _term = _CLNEW
+ Term; //cannot use reference, because TermInfosReader uses non
ref-counted array
+ }
+ _term->set(t, t->text());
+
+ //finalize prev
+ _CLDECDELETE(prev);
+
+ //Change the current termInfo so it matches the new current term
+ termInfo->set(ti);
+
+ //Have the buffer grown if needed
+ if (bufferLength <= _term->textLength())
+ growBuffer(_term->textLength(), true); // copy term text into buffer
+ else
+ _tcsncpy(buffer, _term->text(), bufferLength); //just copy the buffer
+}
+
+TermInfo* SegmentTermEnum::getTermInfo() const {
+ //Func - Returns a clone of the current termInfo
+ //Pre - termInfo != NULL
+ // next() must have been called once
+ //Post - A clone of the current termInfo has been returned
+
+ return _CLNEW TermInfo(*termInfo); //clone
+}
+
+void SegmentTermEnum::getTermInfo(TermInfo* ti) const {
+ //Func - Retrieves a clone of termInfo through the reference ti
+ //Pre - ti contains a valid reference to TermInfo
+ // termInfo != NULL
+ // next() must have been called once
+ //Post - ti contains a clone of termInfo
+
+ ti->set(termInfo);
+}
+
+int64_t SegmentTermEnum::freqPointer() const {
+ //Func - Returns the freqpointer of the current termInfo
+ //Pre - termInfo != NULL
+ // next() must have been called once
+ //Post - The freqpointer of the current termInfo has been returned
+
+ return termInfo->freqPointer;
+}
+
+int64_t SegmentTermEnum::proxPointer() const {
+ //Func - Returns the proxPointer of the current termInfo
+ //Pre - termInfo != NULL
+ // next() must have been called once
+ //Post - the proxPointer of the current termInfo has been returned
+
+ return termInfo->proxPointer;
+}
+
+SegmentTermEnum* SegmentTermEnum::clone() const {
+ //Func - Returns a clone of this instance
+ //Pre - true
+ //Post - An clone of this instance has been returned
+
+ return _CLNEW SegmentTermEnum(*this);
+}
+
+Term* SegmentTermEnum::readTerm(Term* reuse) {
+ //Func - Reads the next term in the enumeration
+ //Pre - true
+ //Post - The next Term in the enumeration has been read and returned
+
+ //Read the start position from the inputStream input
+ int32_t start = input->readVInt();
+ //Read the length of term in the inputStream input
+ int32_t length = input->readVInt();
+
+ //Calculated the total lenght of bytes that buffer must be to contain the
current
+ //chars in buffer and the new ones yet to be read
+ uint32_t totalLength = start + length;
+
+ if (static_cast<uint32_t>(bufferLength) < totalLength + 1)
+ growBuffer(totalLength, false); //dont copy the buffer over.
+
+ //Read a length number of characters into the buffer from position start
in the inputStream input
+ input->readChars(buffer, start, length);
+ //Null terminate the string
+ buffer[totalLength] = 0;
+
+ //Return a new Term
+ int32_t field = input->readVInt();
+ const TCHAR* fieldname = fieldInfos->fieldName(field);
+ if (reuse == NULL) reuse = _CLNEW Term;
+
+ reuse->set(fieldname, buffer, false);
+ return reuse;
+}
+
+void SegmentTermEnum::growBuffer(const uint32_t length, bool force_copy) {
+ //Func - Instantiate a buffer of length length+1
+ //Pre - length > 0
+ //Post - pre(buffer) has been deleted with its contents. A new buffer
+ // has been allocated of length length+1 and the text of term has
been copied
+ // to buffer
+ //todo: we could guess that we will need to re-grow this
+ //buffer a few times...so start off with a reasonable grow
+ //value...
+ if (bufferLength > length) return;
+
+ //Store the new bufferLength
+ if (length - bufferLength < 8)
+ bufferLength = length + 8;
+ else
+ bufferLength = length + 1;
+
+ bool copy = buffer == NULL;
+
+ //Instantiate the new buffer + 1 is needed for terminator '\0'
+ if (buffer == NULL)
+ buffer = (TCHAR*)malloc(sizeof(TCHAR) * (bufferLength + 1));
+ else
+ buffer = (TCHAR*)realloc(buffer, sizeof(TCHAR) * (bufferLength + 1));
+
+ if (copy || force_copy) {
+ //Copy the text of term into buffer
+ _tcsncpy(buffer, _term->text(), bufferLength);
+ }
+}
CL_NS_END
diff --git a/src/core/CLucene/index/TermInfosReader.cpp
b/src/core/CLucene/index/TermInfosReader.cpp
index 7996d4d4..6cf8b42f 100644
--- a/src/core/CLucene/index/TermInfosReader.cpp
+++ b/src/core/CLucene/index/TermInfosReader.cpp
@@ -24,207 +24,206 @@ CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_DEF(index)
-
- TermInfosReader::TermInfosReader(Directory* dir, const char* seg,
FieldInfos* fis, const int32_t readBufferSize):
- directory (dir),fieldInfos (fis), indexTerms(NULL), indexInfos(NULL),
indexPointers(NULL), indexDivisor(1)
- {
- //Func - Constructor.
- // Reads the TermInfos file (.tis) and eventually the Term Info Index
file (.tii)
- //Pre - dir is a reference to a valid Directory
- // Fis contains a valid reference to an FieldInfos instance
- // seg != NULL and contains the name of the segment
- //Post - An instance has been created and the index named seg has been read.
(Remember
- // a segment is nothing more then an independently readable index)
-
- CND_PRECONDITION(seg != NULL, "seg is NULL");
-
- //Initialize the name of the segment
- segment = seg;
-
- //Create a filname fo a Term Info File
- string tisFile = Misc::segmentname(segment,".tis");
- string tiiFile = Misc::segmentname(segment,".tii");
- bool success = false;
+TermInfosReader::TermInfosReader(Directory* dir, const char* seg, FieldInfos*
fis,
+ const int32_t readBufferSize)
+ : directory(dir),
+ fieldInfos(fis),
+ indexTerms(NULL),
+ indexInfos(NULL),
+ indexPointers(NULL),
+ indexDivisor(1) {
+ //Func - Constructor.
+ // Reads the TermInfos file (.tis) and eventually the Term Info
Index file (.tii)
+ //Pre - dir is a reference to a valid Directory
+ // Fis contains a valid reference to an FieldInfos instance
+ // seg != NULL and contains the name of the segment
+ //Post - An instance has been created and the index named seg has been
read. (Remember
+ // a segment is nothing more then an independently readable index)
+
+ CND_PRECONDITION(seg != NULL, "seg is NULL");
+
+ //Initialize the name of the segment
+ segment = seg;
+
+ //Create a filname fo a Term Info File
+ string tisFile = Misc::segmentname(segment, ".tis");
+ string tiiFile = Misc::segmentname(segment, ".tii");
+ bool success = false;
origEnum = indexEnum = NULL;
_size = indexTermsLength = totalIndexInterval = 0;
- indexIsRead = false;
-
- try {
- //Create an SegmentTermEnum for storing all the terms read of the segment
-
- // tii
- auto tiiStream = directory->openInput( tiiFile.c_str(), readBufferSize );
- indexEnum = _CLNEW SegmentTermEnum(tiiStream, fieldInfos, true, -1);
- CND_CONDITION(indexEnum != NULL, "No memory could be allocated for index
enumerator");
-
- // tis
- auto tisStream = directory->openInput( tisFile.c_str(), readBufferSize );
- origEnum = _CLNEW SegmentTermEnum(tisStream, fieldInfos, false,
indexEnum->getFormat());
- origEnum->initByTii(indexEnum);
- CND_CONDITION(origEnum != NULL, "No memory could be allocated for index
enumerator");
- _size = origEnum->size;
- totalIndexInterval = origEnum->indexInterval;
-
- //call ensureIndexIsRead to load data to memory right now
- ensureIndexIsRead();
-
- success = true;
- } _CLFINALLY({
- // With lock-less commits, it's entirely possible (and
- // fine) to hit a FileNotFound exception above. In
- // this case, we want to explicitly close any subset
- // of things that were opened so that we don't have to
- // wait for a GC to do so.
- if (!success) {
- close();
- }
- });
-
- }
-
- TermInfosReader::~TermInfosReader(){
- //Func - Destructor
- //Pre - true
- //Post - The instance has been destroyed
-
- //Close the TermInfosReader to be absolutly sure that enumerator has
been closed
- //and the arrays indexTerms, indexPointers and indexInfos and their
elements
- //have been destroyed
- close();
- }
- int32_t TermInfosReader::getSkipInterval() const {
+ indexIsRead = false;
+
+ try {
+ //Create an SegmentTermEnum for storing all the terms read of the
segment
+
+ // tii
+ auto tiiStream = directory->openInput(tiiFile.c_str(), readBufferSize);
+ indexEnum = _CLNEW SegmentTermEnum(tiiStream, fieldInfos, true);
+ indexEnum->init(-1);
+ CND_CONDITION(indexEnum != NULL, "No memory could be allocated for
index enumerator");
+
+ // tis
+ auto tisStream = directory->openInput(tisFile.c_str(), readBufferSize);
+ origEnum = _CLNEW SegmentTermEnum(tisStream, fieldInfos, false);
+ origEnum->init(indexEnum->getFormat());
+ origEnum->initByTii(indexEnum);
+ CND_CONDITION(origEnum != NULL, "No memory could be allocated for
index enumerator");
+ _size = origEnum->size;
+ totalIndexInterval = origEnum->indexInterval;
+
+ //call ensureIndexIsRead to load data to memory right now
+ ensureIndexIsRead();
+
+ success = true;
+ }
+ _CLFINALLY({
+ // With lock-less commits, it's entirely possible (and
+ // fine) to hit a FileNotFound exception above. In
+ // this case, we want to explicitly close any subset
+ // of things that were opened so that we don't have to
+ // wait for a GC to do so.
+ if (!success) {
+ close();
+ }
+ });
+}
+
+TermInfosReader::~TermInfosReader() {
+ //Func - Destructor
+ //Pre - true
+ //Post - The instance has been destroyed
+
+ //Close the TermInfosReader to be absolutly sure that enumerator has been
closed
+ //and the arrays indexTerms, indexPointers and indexInfos and their
elements
+ //have been destroyed
+ close();
+}
+int32_t TermInfosReader::getSkipInterval() const {
return origEnum->skipInterval;
- }
+}
- int32_t TermInfosReader::getMaxSkipLevels() const {
+int32_t TermInfosReader::getMaxSkipLevels() const {
return origEnum->maxSkipLevels;
- }
-
- void TermInfosReader::setIndexDivisor(const int32_t _indexDivisor) {
- if (indexDivisor < 1)
- _CLTHROWA(CL_ERR_IllegalArgument, "indexDivisor must be > 0");
+}
- if (indexTerms != NULL)
- _CLTHROWA(CL_ERR_IllegalArgument, "index terms are already
loaded");
+void TermInfosReader::setIndexDivisor(const int32_t _indexDivisor) {
+ if (indexDivisor < 1) _CLTHROWA(CL_ERR_IllegalArgument, "indexDivisor must
be > 0");
- this->indexDivisor = _indexDivisor;
- totalIndexInterval = origEnum->indexInterval * _indexDivisor;
- }
+ if (indexTerms != NULL) _CLTHROWA(CL_ERR_IllegalArgument, "index terms are
already loaded");
- int32_t TermInfosReader::getIndexDivisor() const { return indexDivisor; }
- void TermInfosReader::close() {
+ this->indexDivisor = _indexDivisor;
+ totalIndexInterval = origEnum->indexInterval * _indexDivisor;
+}
- //Check if indexTerms and indexInfos exist
- if (indexTerms && indexInfos){
- //Iterate through arrays indexTerms and indexPointer to
- //destroy their elements
+int32_t TermInfosReader::getIndexDivisor() const {
+ return indexDivisor;
+}
+void TermInfosReader::close() {
+ //Check if indexTerms and indexInfos exist
+ if (indexTerms && indexInfos) {
+ //Iterate through arrays indexTerms and indexPointer to
+ //destroy their elements
#ifdef _DEBUG
- for ( int32_t i=0; i<indexTermsLength;++i ){
+ for (int32_t i = 0; i < indexTermsLength; ++i) {
indexTerms[i].__cl_refcount--;
- }
+ }
#endif
- }
- //Delete the arrays
- if (indexTerms){
- delete [] indexTerms;
- indexTerms = NULL;
- }
- if (indexInfos){
- _CLDELETE_ARRAY(indexInfos);
- indexInfos = NULL;
- }
-
- //Delete the arrays
- if (indexPointers) {
+ }
+ //Delete the arrays
+ if (indexTerms) {
+ delete[] indexTerms;
+ indexTerms = NULL;
+ }
+ if (indexInfos) {
+ _CLDELETE_ARRAY(indexInfos);
+ indexInfos = NULL;
+ }
+
+ //Delete the arrays
+ if (indexPointers) {
_CLDELETE_ARRAY(indexPointers);
indexPointers = NULL;
- }
+ }
- if (origEnum != NULL){
+ if (origEnum != NULL) {
origEnum->close();
- //Get a pointer to IndexInput used by the enumeration but
- //instantiated in the constructor by directory.open( tisFile )
- IndexInput *is = origEnum->input;
+ //Get a pointer to IndexInput used by the enumeration but
+ //instantiated in the constructor by directory.open( tisFile )
+ IndexInput* is = origEnum->input;
//Delete the enumuration enumerator
_CLDELETE(origEnum);
//Delete the IndexInput
_CLDELETE(is);
- }
+ }
- if (indexEnum != NULL){
+ if (indexEnum != NULL) {
indexEnum->close();
- //Get a pointer to IndexInput used by the enumeration but
- //instantiated in the constructor by directory.open( tiiFile )
- IndexInput *is = indexEnum->input;
+ //Get a pointer to IndexInput used by the enumeration but
+ //instantiated in the constructor by directory.open( tiiFile )
+ IndexInput* is = indexEnum->input;
//Delete the enumuration enumerator
_CLDELETE(indexEnum);
- indexEnum = NULL;
+ indexEnum = NULL;
//Delete the IndexInput
_CLDELETE(is);
- }
- enumerators.setNull();
- }
-
- int64_t TermInfosReader::size() const{
- //Func - Return the size of the enumeration of TermInfos
- //Pre - true
- //Post - size has been returened
+ }
+ enumerators.setNull();
+}
- return _size;
- }
+int64_t TermInfosReader::size() const {
+ //Func - Return the size of the enumeration of TermInfos
+ //Pre - true
+ //Post - size has been returened
+ return _size;
+}
- Term* TermInfosReader::get(const int32_t position) {
- //Func - Returns the nth term in the set
- //Pre - position > = 0
- //Post - The n-th term in the set has been returned
+Term* TermInfosReader::get(const int32_t position) {
+ //Func - Returns the nth term in the set
+ //Pre - position > = 0
+ //Post - The n-th term in the set has been returned
- //Check if the size is 0 because then there are no terms
- if (_size == 0)
- return NULL;
+ //Check if the size is 0 because then there are no terms
+ if (_size == 0) return NULL;
- SegmentTermEnum* enumerator = getEnum();
+ SegmentTermEnum* enumerator = getEnum();
- if (
- enumerator != NULL //an enumeration exists
- && enumerator->term(false) != NULL // term is at or past current
- && position >= enumerator->position
- && position < (enumerator->position + totalIndexInterval)
- )
- {
- return scanEnum(position); // can avoid
seek
- }
+ if (enumerator != NULL //an enumeration exists
+ && enumerator->term(false) != NULL // term is at or past current
+ && position >= enumerator->position &&
+ position < (enumerator->position + totalIndexInterval)) {
+ return scanEnum(position); // can avoid seek
+ }
//random-access: must seek
seekEnum(position / totalIndexInterval);
- //Get the Term at position
+ //Get the Term at position
return scanEnum(position);
- }
+}
- SegmentTermEnum* TermInfosReader::getEnum(){
+SegmentTermEnum* TermInfosReader::getEnum() {
SegmentTermEnum* termEnum = enumerators.get();
- if (termEnum == NULL){
- termEnum = terms();
- enumerators.set(termEnum);
+ if (termEnum == NULL) {
+ termEnum = terms();
+ enumerators.set(termEnum);
}
return termEnum;
- }
+}
- TermInfo* TermInfosReader::get(const Term* term){
- //Func - Returns a TermInfo for a term
- //Pre - term holds a valid reference to term
- //Post - if term can be found its TermInfo has been returned otherwise NULL
+TermInfo* TermInfosReader::get(const Term* term) {
+ //Func - Returns a TermInfo for a term
+ //Pre - term holds a valid reference to term
+ //Post - if term can be found its TermInfo has been returned otherwise NULL
//If the size of the enumeration is 0 then no Terms have been read
- if (_size == 0)
- return NULL;
+ if (_size == 0) return NULL;
ensureIndexIsRead();
@@ -233,250 +232,238 @@ CL_NS_DEF(index)
// optimize sequential access: first try scanning cached enumerator w/o
seeking
if (
- //the current term of the enumeration enumerator is not at the
end AND
- enumerator->term(false) != NULL &&
- (
- //there exists a previous current called prev and term is
positioned after this prev OR
- ( enumerator->prev != NULL && term->compareTo(enumerator->prev) >
0) ||
- //term is positioned at the same position as the current of
enumerator or at a higher position
- term->compareTo(enumerator->term(false)) >= 0 )
- )
- {
-
- //Calculate the offset for the position
- int32_t _enumOffset =
(int32_t)(enumerator->position/totalIndexInterval)+1;
-
- // but before end of block
- if (
- //the length of indexTerms (the number of terms in
enumerator) equals
- //_enum_offset OR
- indexTermsLength == _enumOffset ||
- //term is positioned in front of term found at
_enumOffset in indexTerms
- term->compareTo(&indexTerms[_enumOffset]) < 0){
-
- //no need to seek, retrieve the TermInfo for term
- return scanEnum(term);
+ //the current term of the enumeration enumerator is not at the end
AND
+ enumerator->term(false) != NULL &&
+ (
+ //there exists a previous current called prev and term is
positioned after this prev OR
+ (enumerator->prev != NULL &&
term->compareTo(enumerator->prev) > 0) ||
+ //term is positioned at the same position as the current
of enumerator or at a higher position
+ term->compareTo(enumerator->term(false)) >= 0)) {
+ //Calculate the offset for the position
+ int32_t _enumOffset = (int32_t)(enumerator->position /
totalIndexInterval) + 1;
+
+ // but before end of block
+ if (
+ //the length of indexTerms (the number of terms in enumerator)
equals
+ //_enum_offset OR
+ indexTermsLength == _enumOffset ||
+ //term is positioned in front of term found at _enumOffset in
indexTerms
+ term->compareTo(&indexTerms[_enumOffset]) < 0) {
+ //no need to seek, retrieve the TermInfo for term
+ return scanEnum(term);
}
}
//Reposition current term in the enumeration
seekEnum(getIndexOffset(term));
- //Return the TermInfo for term
+ //Return the TermInfo for term
return scanEnum(term);
- }
-
+}
- int64_t TermInfosReader::getPosition(const Term* term) {
- //Func - Returns the position of a Term in the set
- //Pre - term holds a valid reference to a Term
- // enumerator != NULL
- //Post - If term was found then its position is returned otherwise -1
+int64_t TermInfosReader::getPosition(const Term* term) {
+ //Func - Returns the position of a Term in the set
+ //Pre - term holds a valid reference to a Term
+ // enumerator != NULL
+ //Post - If term was found then its position is returned otherwise -1
- //if the enumeration is empty then return -1
- if (_size == 0)
- return -1;
+ //if the enumeration is empty then return -1
+ if (_size == 0) return -1;
- ensureIndexIsRead();
-
- //Retrieve the indexOffset for term
- int32_t indexOffset = getIndexOffset(term);
- seekEnum(indexOffset);
+ ensureIndexIsRead();
- SegmentTermEnum* enumerator = getEnum();
+ //Retrieve the indexOffset for term
+ int32_t indexOffset = getIndexOffset(term);
+ seekEnum(indexOffset);
- while(term->compareTo(enumerator->term(false)) > 0 &&
enumerator->next()) {}
+ SegmentTermEnum* enumerator = getEnum();
- if ( term->equals(enumerator->term(false)) ){
- return enumerator->position;
- }else
- return -1;
- }
+ while (term->compareTo(enumerator->term(false)) > 0 && enumerator->next())
{
+ }
- SegmentTermEnum* TermInfosReader::terms(const Term* term) {
- //Func - Returns an enumeration of terms starting at or after the named term.
- // If term is null then enumerator is set to the beginning
- //Pre - term holds a valid reference to a Term
- // enumerator != NULL
- //Post - An enumeration of terms starting at or after the named term has
been returned
+ if (term->equals(enumerator->term(false))) {
+ return enumerator->position;
+ } else
+ return -1;
+}
+
+SegmentTermEnum* TermInfosReader::terms(const Term* term) {
+ //Func - Returns an enumeration of terms starting at or after the named
term.
+ // If term is null then enumerator is set to the beginning
+ //Pre - term holds a valid reference to a Term
+ // enumerator != NULL
+ //Post - An enumeration of terms starting at or after the named term has
been returned
+
+ SegmentTermEnum* enumerator = NULL;
+ if (term != NULL) {
+ //Seek enumerator to term; delete the new TermInfo that's returned.
+ TermInfo* ti = get(term);
+ _CLLDELETE(ti);
+ enumerator = getEnum();
+ } else
+ enumerator = origEnum;
+
+ //Clone the entire enumeration
+ SegmentTermEnum* cln = enumerator->clone();
+
+ //Check if cln points to a valid instance
+ CND_CONDITION(cln != NULL, "cln is NULL");
+
+ return cln;
+}
+
+void TermInfosReader::ensureIndexIsRead() {
+ //Func - Reads the term info index file or .tti file.
+ // This file contains every IndexInterval-th entry from the .tis
file,
+ // along with its location in the "tis" file. This is designed to be
read entirely
+ // into memory and used to provide random access to the "tis" file.
+ //Pre - indexTerms = NULL
+ // indexInfos = NULL
+ // indexPointers = NULL
+ //Post - The term info index file has been read into memory
- SegmentTermEnum* enumerator = NULL;
- if ( term != NULL ){
- //Seek enumerator to term; delete the new TermInfo that's
returned.
- TermInfo* ti = get(term);
- _CLLDELETE(ti);
- enumerator = getEnum();
- }else
- enumerator = origEnum;
+ SCOPED_LOCK_MUTEX(THIS_LOCK)
- //Clone the entire enumeration
- SegmentTermEnum* cln = enumerator->clone();
+ if (indexIsRead) return;
+
+ //https://jira.qianxin-inc.cn/browse/XHBUG-2921
+ //https://jira.qianxin-inc.cn/browse/XHBUG-3053
+ if (indexEnum == NULL) _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
+
+ try {
+ indexTermsLength = (size_t)indexEnum->size;
+
+ //Instantiate an block of Term's,so that each one doesn't have to be
new'd
+ indexTerms = new Term[indexTermsLength];
+ CND_CONDITION(
+ indexTerms != NULL,
+ "No memory could be allocated for indexTerms"); //Check if is
indexTerms is a valid array
+
+ //Instantiate an big block of TermInfo's, so that each one doesn't
have to be new'd
+ indexInfos = _CL_NEWARRAY(TermInfo, indexTermsLength);
+ CND_CONDITION(
+ indexInfos != NULL,
+ "No memory could be allocated for indexInfos"); //Check if is
indexInfos is a valid array
+
+ //Instantiate an array indexPointers that contains pointers to the
term info index file
+ indexPointers = _CL_NEWARRAY(int64_t, indexTermsLength);
+ CND_CONDITION(
+ indexPointers != NULL,
+ "No memory could be allocated for indexPointers"); //Check if
is indexPointers is a valid array
+
+ //Iterate through the terms of indexEnum
+ for (int32_t i = 0; indexEnum->next(); ++i) {
+ indexTerms[i].set(indexEnum->term(false),
indexEnum->term(false)->text());
+ indexEnum->getTermInfo(&indexInfos[i]);
+ indexPointers[i] = indexEnum->indexPointer;
+
+ for (int32_t j = 1; j < indexDivisor; j++)
+ if (!indexEnum->next()) break;
+ }
+ indexIsRead = true;
+ }
+ _CLFINALLY(indexEnum->close();
+ //Close and delete the IndexInput is. The close is done by the
destructor.
+ _CLDELETE(indexEnum->input); _CLDELETE(indexEnum); indexEnum =
NULL;);
+}
+
+int32_t TermInfosReader::getIndexOffset(const Term* term) {
+ //Func - Returns the offset of the greatest index entry which is less than
or equal to term.
+ //Pre - term holds a reference to a valid term
+ // indexTerms != NULL
+ //Post - The new offset has been returned
+
+ //Check if is indexTerms is a valid array
+ CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
+
+ int32_t lo = 0;
+ int32_t hi = indexTermsLength - 1;
+ int32_t mid;
+ int32_t delta;
+
+ while (hi >= lo) {
+ //Start in the middle betwee hi and lo
+ mid = (lo + hi) >> 1;
+
+ //Check if is indexTerms[mid] is a valid instance of Term
+ CND_PRECONDITION(&indexTerms[mid] != NULL, "indexTerms[mid] is NULL");
+ CND_PRECONDITION(mid < indexTermsLength, "mid >= indexTermsLength");
+
+ //Determine if term is before mid or after mid
+ delta = term->compareTo(&indexTerms[mid]);
+ if (delta < 0) {
+ //Calculate the new hi
+ hi = mid - 1;
+ } else if (delta > 0) {
+ //Calculate the new lo
+ lo = mid + 1;
+ } else {
+ //term has been found so return its position
+ return mid;
+ }
+ }
+ // the new starting offset
+ return hi;
+}
+
+void TermInfosReader::seekEnum(const int32_t indexOffset) {
+ //Func - Reposition the current Term and TermInfo to indexOffset
+ //Pre - indexOffset >= 0
+ // indexTerms != NULL
+ // indexInfos != NULL
+ // indexPointers != NULL
+ //Post - The current Term and Terminfo have been repositioned to
indexOffset
+
+ CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative
number");
+ CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
+ CND_PRECONDITION(indexInfos != NULL, "indexInfos is NULL");
+ CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");
- //Check if cln points to a valid instance
- CND_CONDITION(cln != NULL,"cln is NULL");
+ SegmentTermEnum* enumerator = getEnum();
+ enumerator->seek(indexPointers[indexOffset], (indexOffset *
totalIndexInterval) - 1,
+ &indexTerms[indexOffset], &indexInfos[indexOffset]);
+}
+
+TermInfo* TermInfosReader::scanEnum(const Term* term) {
+ //Func - Scans the Enumeration of terms for term and returns the
corresponding TermInfo instance if found.
+ // The search is started from the current term.
+ //Pre - term contains a valid reference to a Term
+ // enumerator != NULL
+ //Post - if term has been found the corresponding TermInfo has been
returned otherwise NULL
+ // has been returned
- return cln;
- }
+ SegmentTermEnum* enumerator = getEnum();
+ enumerator->scanTo(term);
+
+ //Check if the at the position the Term term can be found
+ if (enumerator->term(false) != NULL &&
term->equals(enumerator->term(false))) {
+ //Return the TermInfo instance about term
+ return enumerator->getTermInfo();
+ } else {
+ //term was not found so no TermInfo can be returned
+ return NULL;
+ }
+}
+Term* TermInfosReader::scanEnum(const int32_t position) {
+ //Func - Scans the enumeration to the requested position and returns the
+ // Term located at that position
+ //Pre - position > = 0
+ // enumerator != NULL
+ //Post - The Term at the requested position has been returned
- void TermInfosReader::ensureIndexIsRead() {
- //Func - Reads the term info index file or .tti file.
- // This file contains every IndexInterval-th entry from the .tis file,
- // along with its location in the "tis" file. This is designed to be
read entirely
- // into memory and used to provide random access to the "tis" file.
- //Pre - indexTerms = NULL
- // indexInfos = NULL
- // indexPointers = NULL
- //Post - The term info index file has been read into memory
+ SegmentTermEnum* enumerator = getEnum();
- SCOPED_LOCK_MUTEX(THIS_LOCK)
+ //As long the position of the enumeration enumerator is smaller than the
requested one
+ while (enumerator->position < position) {
+ //Move the current of enumerator to the next
+ if (!enumerator->next()) {
+ //If there is no next it means that the requested position was to
big
+ return NULL;
+ }
+ }
- if (indexIsRead)
- return;
-
- //https://jira.qianxin-inc.cn/browse/XHBUG-2921
- //https://jira.qianxin-inc.cn/browse/XHBUG-3053
- if (indexEnum == NULL)
- _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
-
- try {
- indexTermsLength = (size_t)indexEnum->size;
-
- //Instantiate an block of Term's,so that each one doesn't
have to be new'd
- indexTerms = new Term[indexTermsLength];
- CND_CONDITION(indexTerms != NULL,"No memory could be allocated for
indexTerms");//Check if is indexTerms is a valid array
-
- //Instantiate an big block of TermInfo's, so that each one
doesn't have to be new'd
- indexInfos = _CL_NEWARRAY(TermInfo,indexTermsLength);
- CND_CONDITION(indexInfos != NULL,"No memory could be allocated for
indexInfos"); //Check if is indexInfos is a valid array
-
- //Instantiate an array indexPointers that contains pointers to the
term info index file
- indexPointers = _CL_NEWARRAY(int64_t,indexTermsLength);
- CND_CONDITION(indexPointers != NULL,"No memory could be allocated
for indexPointers");//Check if is indexPointers is a valid array
-
- //Iterate through the terms of indexEnum
- for (int32_t i = 0; indexEnum->next(); ++i){
-
indexTerms[i].set(indexEnum->term(false),indexEnum->term(false)->text());
- indexEnum->getTermInfo(&indexInfos[i]);
- indexPointers[i] = indexEnum->indexPointer;
-
- for (int32_t j = 1; j < indexDivisor; j++)
- if (!indexEnum->next())
- break;
- }
- indexIsRead = true;
- }_CLFINALLY(
- indexEnum->close();
- //Close and delete the IndexInput is. The close is done by
the destructor.
- _CLDELETE( indexEnum->input );
- _CLDELETE( indexEnum );
- indexEnum = NULL;
- );
- }
-
-
- int32_t TermInfosReader::getIndexOffset(const Term* term){
- //Func - Returns the offset of the greatest index entry which is less than
or equal to term.
- //Pre - term holds a reference to a valid term
- // indexTerms != NULL
- //Post - The new offset has been returned
-
- //Check if is indexTerms is a valid array
- CND_PRECONDITION(indexTerms != NULL,"indexTerms is NULL");
-
- int32_t lo = 0;
- int32_t hi = indexTermsLength - 1;
- int32_t mid;
- int32_t delta;
-
- while (hi >= lo) {
- //Start in the middle betwee hi and lo
- mid = (lo + hi) >> 1;
-
- //Check if is indexTerms[mid] is a valid instance of Term
- CND_PRECONDITION(&indexTerms[mid] != NULL,"indexTerms[mid] is NULL");
- CND_PRECONDITION(mid < indexTermsLength,"mid >= indexTermsLength");
-
- //Determine if term is before mid or after mid
- delta = term->compareTo(&indexTerms[mid]);
- if (delta < 0){
- //Calculate the new hi
- hi = mid - 1;
- }else if (delta > 0){
- //Calculate the new lo
- lo = mid + 1;
- }else{
- //term has been found so return its position
- return mid;
- }
- }
- // the new starting offset
- return hi;
- }
-
- void TermInfosReader::seekEnum(const int32_t indexOffset) {
- //Func - Reposition the current Term and TermInfo to indexOffset
- //Pre - indexOffset >= 0
- // indexTerms != NULL
- // indexInfos != NULL
- // indexPointers != NULL
- //Post - The current Term and Terminfo have been repositioned to indexOffset
-
- CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative
number");
- CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
- CND_PRECONDITION(indexInfos != NULL, "indexInfos is NULL");
- CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");
-
- SegmentTermEnum* enumerator = getEnum();
- enumerator->seek(
- indexPointers[indexOffset],
- (indexOffset * totalIndexInterval) - 1,
- &indexTerms[indexOffset],
- &indexInfos[indexOffset]
- );
- }
-
-
- TermInfo* TermInfosReader::scanEnum(const Term* term) {
- //Func - Scans the Enumeration of terms for term and returns the
corresponding TermInfo instance if found.
- // The search is started from the current term.
- //Pre - term contains a valid reference to a Term
- // enumerator != NULL
- //Post - if term has been found the corresponding TermInfo has been returned
otherwise NULL
- // has been returned
-
- SegmentTermEnum* enumerator = getEnum();
- enumerator->scanTo(term);
-
- //Check if the at the position the Term term can be found
- if (enumerator->term(false) != NULL &&
term->equals(enumerator->term(false)) ){
- //Return the TermInfo instance about term
- return enumerator->getTermInfo();
- }else{
- //term was not found so no TermInfo can be returned
- return NULL;
- }
- }
-
- Term* TermInfosReader::scanEnum(const int32_t position) {
- //Func - Scans the enumeration to the requested position and returns the
- // Term located at that position
- //Pre - position > = 0
- // enumerator != NULL
- //Post - The Term at the requested position has been returned
-
- SegmentTermEnum* enumerator = getEnum();
-
- //As long the position of the enumeration enumerator is smaller than
the requested one
- while(enumerator->position < position){
- //Move the current of enumerator to the next
- if (!enumerator->next()){
- //If there is no next it means that the requested
position was to big
- return NULL;
- }
- }
-
- //Return the Term a the requested position
- return enumerator->term();
- }
+ //Return the Term a the requested position
+ return enumerator->term();
+}
CL_NS_END
diff --git a/src/core/CLucene/index/_SegmentTermEnum.h
b/src/core/CLucene/index/_SegmentTermEnum.h
index b5fa419d..3dd2c8c5 100644
--- a/src/core/CLucene/index/_SegmentTermEnum.h
+++ b/src/core/CLucene/index/_SegmentTermEnum.h
@@ -55,7 +55,8 @@ protected:
public:
///Constructor
- SegmentTermEnum(CL_NS(store)::IndexInput* i, FieldInfos* fis, const
bool isi, int32_t in_format = -1);
+ SegmentTermEnum(CL_NS(store)::IndexInput* i, FieldInfos* fis, const
bool isi);
+ void init(int32_t in_format = -1);
///Destructor
~SegmentTermEnum();
diff --git a/src/core/CLucene/store/IndexOutput.cpp
b/src/core/CLucene/store/IndexOutput.cpp
index 05e7695f..77c37400 100644
--- a/src/core/CLucene/store/IndexOutput.cpp
+++ b/src/core/CLucene/store/IndexOutput.cpp
@@ -35,12 +35,13 @@ CL_NS_DEF(store)
close();
}
- void BufferedIndexOutput::close(){
- flush();
- _CLDELETE_ARRAY( buffer );
-
- bufferStart = 0;
- bufferPosition = 0;
+ void BufferedIndexOutput::close() {
+ // flush may throw error here, if we do not delete buffer for all
circumstances,
+ // we may close again in destructor above, that would cause pure virtual
function call for flushBuffer
+ try {
+ flush();
+ }
+ _CLFINALLY(_CLDELETE_ARRAY(buffer); bufferStart = 0; bufferPosition = 0;)
}
void BufferedIndexOutput::writeByte(const uint8_t b) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]