This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 54b4412f1c7 [Fix](inverted index) fix memory leak in inverted index 
writer for array values (#36144)
54b4412f1c7 is described below

commit 54b4412f1c7885f4325e30cdef0d923e2331ea88
Author: airborne12 <[email protected]>
AuthorDate: Wed Jun 12 14:32:31 2024 +0800

    [Fix](inverted index) fix memory leak in inverted index writer for array 
values (#36144)
    
    Add ownership flag to Field's TokenStream value and Analyzer's Reader
    value, free them when tokenizer finalize.
---
 be/src/clucene                                          | 2 +-
 be/src/olap/rowset/segment_v2/inverted_index_writer.cpp | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index 1936207ae43..25324632bab 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit 1936207ae4345e45fcb2b4e6b1ed1a318f8c1b1b
+Subproject commit 25324632babc0e5da28048ebbe9adcbdfc73c281
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index c268b6e12ce..ffbd90c7173 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -404,12 +404,15 @@ public:
                             _parser_type != 
InvertedIndexParserType::PARSER_NONE) {
                             // in this case stream need to delete after 
add_document, because the
                             // stream can not reuse for different field
+                            bool own_token_stream = true;
+                            bool own_reader = true;
                             std::unique_ptr<lucene::util::Reader> 
char_string_reader = nullptr;
                             
RETURN_IF_ERROR(create_char_string_reader(char_string_reader));
                             char_string_reader->init(v->get_data(), 
v->get_size(), false);
                             ts = _analyzer->tokenStream(new_field->name(),
                                                         
char_string_reader.release());
-                            new_field->setValue(ts);
+                            _analyzer->set_ownReader(own_reader);
+                            new_field->setValue(ts, own_token_stream);
                         } else {
                             new_field_char_value(v->get_data(), v->get_size(), 
new_field);
                         }
@@ -421,7 +424,6 @@ public:
                     // if this array is null, we just ignore to write inverted 
index
                     RETURN_IF_ERROR(add_document());
                     _doc->clear();
-                    _CLDELETE(ts);
                 } else {
                     // avoid to add doc which without any field which may make 
threadState init skip
                     // init fieldDataArray, then will make error with next doc 
with fields in
@@ -435,7 +437,6 @@ public:
                     _doc->add(*new_field);
                     RETURN_IF_ERROR(add_null_document());
                     _doc->clear();
-                    _CLDELETE(ts);
                 }
                 _rid++;
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to