This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1825002abc06494b9f9a2b5fc004ccf321cfe598
Author: spaces-x <[email protected]>
AuthorDate: Thu Jan 5 10:20:32 2023 +0800

    [Enhancement](SparkLoad): avoid BE OOM in push task, fix #15572 (#15620)
    
    Release memory pool held by the parquet reader when the data has been 
flushed by rowset writter.
    Co-authored-by: spaces-x <[email protected]>
---
 be/src/common/config.h          |  2 ++
 be/src/exec/parquet_scanner.cpp |  1 +
 be/src/olap/push_handler.cpp    | 15 +++++++++++----
 be/src/olap/push_handler.h      |  1 +
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index a94fbe6c92..025d682072 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -503,6 +503,8 @@ CONF_mInt64(write_buffer_size, "209715200");
 
 // max buffer size used in memtable for the aggregated table
 CONF_mInt64(memtable_max_buffer_size, "419430400");
+// write buffer size in push task for sparkload, default 1GB
+CONF_mInt64(flush_size_for_sparkload, "1073741824");
 
 // following 2 configs limit the memory consumption of load process on a 
Backend.
 // eg: memory limit to 80% of mem limit config but up to 100GB(default)
diff --git a/be/src/exec/parquet_scanner.cpp b/be/src/exec/parquet_scanner.cpp
index e6da51f71d..7e3dda4712 100644
--- a/be/src/exec/parquet_scanner.cpp
+++ b/be/src/exec/parquet_scanner.cpp
@@ -65,6 +65,7 @@ Status ParquetScanner::get_next(Tuple* tuple, MemPool* 
tuple_pool, bool* eof, bo
 
         COUNTER_UPDATE(_rows_read_counter, 1);
         SCOPED_TIMER(_materialize_timer);
+        // TODO(weixiang): check whether shallow copy is enough
         RETURN_IF_ERROR(fill_dest_tuple(tuple, tuple_pool, fill_tuple));
         break; // break always
     }
diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp
index e6b858015a..6a33fbb548 100644
--- a/be/src/olap/push_handler.cpp
+++ b/be/src/olap/push_handler.cpp
@@ -237,13 +237,18 @@ Status PushHandler::_convert_v2(TabletSharedPtr 
cur_tablet, RowsetSharedPtr* cur
             }
 
             // 3. Init Row
-            uint8_t* tuple_buf = 
reader->mem_pool()->allocate(schema->schema_size());
-            ContiguousRow row(schema.get(), tuple_buf);
+            std::unique_ptr<uint8_t[]> tuple_buf(new 
uint8_t[schema->schema_size()]);
+            ContiguousRow row(schema.get(), tuple_buf.get());
 
             // 4. Read data from broker and write into SegmentGroup of 
cur_tablet
             // Convert from raw to delta
             VLOG_NOTICE << "start to convert etl file to delta.";
             while (!reader->eof()) {
+                if (reader->mem_pool()->mem_tracker()->consumption() >
+                    config::flush_size_for_sparkload) {
+                    RETURN_NOT_OK(rowset_writer->flush());
+                    reader->mem_pool()->free_all();
+                }
                 res = reader->next(&row);
                 if (!res.ok()) {
                     LOG(WARNING) << "read next row failed."
@@ -814,7 +819,9 @@ Status PushBrokerReader::init(const Schema* schema, const 
TBrokerScanRange& t_sc
     }
     _runtime_profile = _runtime_state->runtime_profile();
     _runtime_profile->set_name("PushBrokerReader");
-    _mem_pool.reset(new MemPool());
+    _mem_pool.reset(new MemPool(_runtime_state->scanner_mem_tracker().get()));
+    _tuple_buffer_pool.reset(new 
MemPool(_runtime_state->scanner_mem_tracker().get()));
+
     _counter.reset(new ScannerCounter());
 
     // init scanner
@@ -846,7 +853,7 @@ Status PushBrokerReader::init(const Schema* schema, const 
TBrokerScanRange& t_sc
     }
 
     int tuple_buffer_size = _tuple_desc->byte_size();
-    void* tuple_buffer = _mem_pool->allocate(tuple_buffer_size);
+    void* tuple_buffer = _tuple_buffer_pool->allocate(tuple_buffer_size);
     if (tuple_buffer == nullptr) {
         LOG(WARNING) << "Allocate memory for tuple failed";
         return Status::OLAPInternalError(OLAP_ERR_PUSH_INIT_ERROR);
diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h
index 02384e9f2d..f24d837651 100644
--- a/be/src/olap/push_handler.h
+++ b/be/src/olap/push_handler.h
@@ -206,6 +206,7 @@ private:
     std::unique_ptr<RuntimeState> _runtime_state;
     RuntimeProfile* _runtime_profile;
     std::unique_ptr<MemPool> _mem_pool;
+    std::unique_ptr<MemPool> _tuple_buffer_pool;
     std::unique_ptr<ScannerCounter> _counter;
     std::unique_ptr<BaseScanner> _scanner;
     // Not used, just for placeholding


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to