This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new 0edfd44  [Fix](clucene) fix clucene Memory leak (#43)
0edfd44 is described below

commit 0edfd44af1aa7402a4316e630706e896310f962e
Author: yangsiyu1 <[email protected]>
AuthorDate: Thu Mar 23 19:06:36 2023 +0800

    [Fix](clucene) fix clucene Memory leak (#43)
    
    Co-authored-by: yangsiyu <[email protected]>
---
 .../CLucene/index/DocumentsWriterThreadState.cpp    | 21 ++++++++-------------
 src/core/CLucene/index/SDocumentWriter.cpp          |  3 +++
 src/core/CLucene/index/SDocumentWriter.h            | 11 ++++++++---
 src/core/CLucene/index/_DocumentsWriter.h           |  2 --
 4 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/core/CLucene/index/DocumentsWriterThreadState.cpp 
b/src/core/CLucene/index/DocumentsWriterThreadState.cpp
index cd8a7f6..21e14fe 100644
--- a/src/core/CLucene/index/DocumentsWriterThreadState.cpp
+++ b/src/core/CLucene/index/DocumentsWriterThreadState.cpp
@@ -79,8 +79,6 @@ DocumentsWriter::ThreadState::ThreadState(DocumentsWriter 
*__parent) : postingsF
     this->pos = NULL;
     this->freq = NULL;
     this->doFlushAfter = false;
-
-    analyzer_chs = _CLNEW lucene::analysis::SimpleAnalyzer<TCHAR>();
 }
 
 DocumentsWriter::ThreadState::~ThreadState() {
@@ -91,10 +89,12 @@ DocumentsWriter::ThreadState::~ThreadState() {
     _CLDELETE(tvfLocal);
     _CLDELETE(fdtLocal);
 
+    if (maxTermPrefix != nullptr) {
+        _CLDELETE_ARRAY(maxTermPrefix);
+    }
+
     for (size_t i = 0; i < allFieldDataArray.length; i++)
         _CLDELETE(allFieldDataArray.values[i]);
-
-    _CLDELETE(analyzer_chs);
 }
 
 void DocumentsWriter::ThreadState::resetPostings() {
@@ -187,7 +187,6 @@ void DocumentsWriter::ThreadState::init(Document *doc, 
int32_t docID) {
     numStoredFields = 0;
     numFieldData = 0;
     numVectorFields = 0;
-    maxTermPrefix = NULL;
 
     assert(0 == fdtLocal->length());
     assert(0 == fdtLocal->getFilePointer());
@@ -798,12 +797,7 @@ void 
DocumentsWriter::ThreadState::FieldData::processField(Analyzer *analyzer) {
             Field *field = docFieldsFinal[j];
 
             if (field->isIndexed()) {
-                //Oney: get the CHN if needed
-                if (field->isTokenizedCHS()) {
-                    invertField(field, threadState->analyzer_chs, 
maxFieldLength);
-                } else {
-                    invertField(field, analyzer, maxFieldLength);
-                }
+                invertField(field, analyzer, maxFieldLength);
             }
 
             if (field->isStored()) {
@@ -1117,10 +1111,11 @@ void 
DocumentsWriter::ThreadState::FieldData::addPosition(Token *token) {
                     // to a prefix, throwing an exception, etc).
                     if (threadState->maxTermPrefix == NULL) {
                         threadState->maxTermPrefix = _CL_NEWARRAY(TCHAR, 31);
-                        _tcsncpy(threadState->maxTermPrefix, tokenText, 30);
-                        threadState->maxTermPrefix[30] = 0;
                     }
 
+                    _tcsncpy(threadState->maxTermPrefix, tokenText, 30);
+                    threadState->maxTermPrefix[30] = 0;
+
                     // Still increment position:
                     position++;
                     return;
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp 
b/src/core/CLucene/index/SDocumentWriter.cpp
index e90f92f..3b6e66f 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -107,6 +107,9 @@ SDocumentsWriter<T>::~SDocumentsWriter() {
     if (threadState != nullptr) {
         _CLDELETE(threadState);
     }
+    if (_files != nullptr) {
+        _CLDELETE(_files);
+    }
 
     // Make sure unused posting slots aren't attempted delete on
     if (this->postingsFreeListDW.values) {
diff --git a/src/core/CLucene/index/SDocumentWriter.h 
b/src/core/CLucene/index/SDocumentWriter.h
index d89a42a..ebb14b3 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -734,9 +734,11 @@ public:
     void setInfoStream(std::ostream *infoStream) override {}
     void setRAMBufferSizeMB(float_t mb) override {}
     void close() override {}
-    const std::vector<std::string> &files() override {
-        auto EMPTY = _CLNEW std::vector<string>;
-        return *EMPTY;
+    const std::vector<std::string>& files() override {
+        if (_files != nullptr)
+            return *_files;
+        _files = _CLNEW std::vector<string>;
+        return *_files;
     }
     void setMaxBufferedDeleteTerms(int32_t _maxBufferedDeleteTerms) override 
{_CLTHROW_NOT_IMPLEMENT}
     int32_t getMaxBufferedDeleteTerms() override {_CLTHROW_NOT_IMPLEMENT}
@@ -753,6 +755,9 @@ public:
     int64_t getRAMUsed() override {_CLTHROW_NOT_IMPLEMENT}
     const std::vector<int32_t> *getBufferedDeleteDocIDs() override 
{_CLTHROW_NOT_IMPLEMENT}
 
+private:
+    std::vector<std::string>* _files = nullptr;
+
 public:
     ThreadState *threadState;
     CL_NS(util)::ObjectArray<BufferedNorms> norms;// Holds norms until we flush
diff --git a/src/core/CLucene/index/_DocumentsWriter.h 
b/src/core/CLucene/index/_DocumentsWriter.h
index 9d359c2..f50d8e8 100644
--- a/src/core/CLucene/index/_DocumentsWriter.h
+++ b/src/core/CLucene/index/_DocumentsWriter.h
@@ -601,8 +601,6 @@ private:
 
     DocumentsWriter* _parent;
 
-    CL_NS(analysis)::Analyzer* analyzer_chs;
-
     ThreadState(DocumentsWriter* _parent);
     virtual ~ThreadState();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to