This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 0edfd44 [Fix](clucene) fix clucene Memory leak (#43)
0edfd44 is described below
commit 0edfd44af1aa7402a4316e630706e896310f962e
Author: yangsiyu1 <[email protected]>
AuthorDate: Thu Mar 23 19:06:36 2023 +0800
[Fix](clucene) fix clucene Memory leak (#43)
Co-authored-by: yangsiyu <[email protected]>
---
.../CLucene/index/DocumentsWriterThreadState.cpp | 21 ++++++++-------------
src/core/CLucene/index/SDocumentWriter.cpp | 3 +++
src/core/CLucene/index/SDocumentWriter.h | 11 ++++++++---
src/core/CLucene/index/_DocumentsWriter.h | 2 --
4 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/src/core/CLucene/index/DocumentsWriterThreadState.cpp
b/src/core/CLucene/index/DocumentsWriterThreadState.cpp
index cd8a7f6..21e14fe 100644
--- a/src/core/CLucene/index/DocumentsWriterThreadState.cpp
+++ b/src/core/CLucene/index/DocumentsWriterThreadState.cpp
@@ -79,8 +79,6 @@ DocumentsWriter::ThreadState::ThreadState(DocumentsWriter
*__parent) : postingsF
this->pos = NULL;
this->freq = NULL;
this->doFlushAfter = false;
-
- analyzer_chs = _CLNEW lucene::analysis::SimpleAnalyzer<TCHAR>();
}
DocumentsWriter::ThreadState::~ThreadState() {
@@ -91,10 +89,12 @@ DocumentsWriter::ThreadState::~ThreadState() {
_CLDELETE(tvfLocal);
_CLDELETE(fdtLocal);
+ if (maxTermPrefix != nullptr) {
+ _CLDELETE_ARRAY(maxTermPrefix);
+ }
+
for (size_t i = 0; i < allFieldDataArray.length; i++)
_CLDELETE(allFieldDataArray.values[i]);
-
- _CLDELETE(analyzer_chs);
}
void DocumentsWriter::ThreadState::resetPostings() {
@@ -187,7 +187,6 @@ void DocumentsWriter::ThreadState::init(Document *doc,
int32_t docID) {
numStoredFields = 0;
numFieldData = 0;
numVectorFields = 0;
- maxTermPrefix = NULL;
assert(0 == fdtLocal->length());
assert(0 == fdtLocal->getFilePointer());
@@ -798,12 +797,7 @@ void
DocumentsWriter::ThreadState::FieldData::processField(Analyzer *analyzer) {
Field *field = docFieldsFinal[j];
if (field->isIndexed()) {
- //Oney: get the CHN if needed
- if (field->isTokenizedCHS()) {
- invertField(field, threadState->analyzer_chs,
maxFieldLength);
- } else {
- invertField(field, analyzer, maxFieldLength);
- }
+ invertField(field, analyzer, maxFieldLength);
}
if (field->isStored()) {
@@ -1117,10 +1111,11 @@ void
DocumentsWriter::ThreadState::FieldData::addPosition(Token *token) {
// to a prefix, throwing an exception, etc).
if (threadState->maxTermPrefix == NULL) {
threadState->maxTermPrefix = _CL_NEWARRAY(TCHAR, 31);
- _tcsncpy(threadState->maxTermPrefix, tokenText, 30);
- threadState->maxTermPrefix[30] = 0;
}
+ _tcsncpy(threadState->maxTermPrefix, tokenText, 30);
+ threadState->maxTermPrefix[30] = 0;
+
// Still increment position:
position++;
return;
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp
b/src/core/CLucene/index/SDocumentWriter.cpp
index e90f92f..3b6e66f 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -107,6 +107,9 @@ SDocumentsWriter<T>::~SDocumentsWriter() {
if (threadState != nullptr) {
_CLDELETE(threadState);
}
+ if (_files != nullptr) {
+ _CLDELETE(_files);
+ }
// Make sure unused posting slots aren't attempted delete on
if (this->postingsFreeListDW.values) {
diff --git a/src/core/CLucene/index/SDocumentWriter.h
b/src/core/CLucene/index/SDocumentWriter.h
index d89a42a..ebb14b3 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -734,9 +734,11 @@ public:
void setInfoStream(std::ostream *infoStream) override {}
void setRAMBufferSizeMB(float_t mb) override {}
void close() override {}
- const std::vector<std::string> &files() override {
- auto EMPTY = _CLNEW std::vector<string>;
- return *EMPTY;
+ const std::vector<std::string>& files() override {
+ if (_files != nullptr)
+ return *_files;
+ _files = _CLNEW std::vector<string>;
+ return *_files;
}
void setMaxBufferedDeleteTerms(int32_t _maxBufferedDeleteTerms) override
{_CLTHROW_NOT_IMPLEMENT}
int32_t getMaxBufferedDeleteTerms() override {_CLTHROW_NOT_IMPLEMENT}
@@ -753,6 +755,9 @@ public:
int64_t getRAMUsed() override {_CLTHROW_NOT_IMPLEMENT}
const std::vector<int32_t> *getBufferedDeleteDocIDs() override
{_CLTHROW_NOT_IMPLEMENT}
+private:
+ std::vector<std::string>* _files = nullptr;
+
public:
ThreadState *threadState;
CL_NS(util)::ObjectArray<BufferedNorms> norms;// Holds norms until we flush
diff --git a/src/core/CLucene/index/_DocumentsWriter.h
b/src/core/CLucene/index/_DocumentsWriter.h
index 9d359c2..f50d8e8 100644
--- a/src/core/CLucene/index/_DocumentsWriter.h
+++ b/src/core/CLucene/index/_DocumentsWriter.h
@@ -601,8 +601,6 @@ private:
DocumentsWriter* _parent;
- CL_NS(analysis)::Analyzer* analyzer_chs;
-
ThreadState(DocumentsWriter* _parent);
virtual ~ThreadState();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]