kangkaisen closed pull request #243: Skip unnecessary stream init and decrease 
the TCMalloc pressure (#242)
URL: https://github.com/apache/incubator-doris/pull/243
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/be/src/olap/column_file/segment_reader.cpp 
b/be/src/olap/column_file/segment_reader.cpp
index 0eb7c4d..284b89f 100644
--- a/be/src/olap/column_file/segment_reader.cpp
+++ b/be/src/olap/column_file/segment_reader.cpp
@@ -94,7 +94,7 @@ SegmentReader::~SegmentReader() {
     _file_handler.close();
 
     if (_is_data_loaded && _runtime_state != NULL) {
-        MemTracker::update_limits(_buffer_size * -1, 
_runtime_state->mem_trackers()); 
+        MemTracker::update_limits(_buffer_size * -1, 
_runtime_state->mem_trackers());
     }
 
     for (auto& it : _streams) {
@@ -248,7 +248,7 @@ OLAPStatus SegmentReader::seek_to_block(
             OLAP_LOG_WARNING("fail to read data stream");
             return res;
         }
-    
+
         OLAPStatus res = _create_reader(&_buffer_size);
         if (res != OLAP_SUCCESS) {
             OLAP_LOG_WARNING("fail to create reader");
@@ -511,7 +511,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t 
first_block, uint32_t last_b
                 --_remain_block;
 
                 if (j < _block_count - 1) {
-                    _stats->rows_stats_filtered += _num_rows_in_block; 
+                    _stats->rows_stats_filtered += _num_rows_in_block;
                 } else {
                     _stats->rows_stats_filtered +=
                         _header_message().number_of_rows() - j * 
_num_rows_in_block;
@@ -551,7 +551,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t 
first_block, uint32_t last_b
                 _include_blocks[j] = DEL_SATISFIED;
                 --_remain_block;
                 if (j < _block_count - 1) {
-                    _stats->rows_stats_filtered += _num_rows_in_block; 
+                    _stats->rows_stats_filtered += _num_rows_in_block;
                 } else {
                     _stats->rows_stats_filtered +=
                         _header_message().number_of_rows() - j * 
_num_rows_in_block;
@@ -682,7 +682,7 @@ OLAPStatus SegmentReader::_load_index(bool is_using_cache) {
                 OLAP_LOG_WARNING("fail to malloc memory. [size=%lu]", 
sizeof(StreamIndexReader));
                 return OLAP_ERR_MALLOC_ERROR;
             }
-            
+
             res = index_message->init(stream_buffer, stream_length, type, 
is_using_cache, _null_supported);
             if (OLAP_SUCCESS != res) {
                 OLAP_LOG_WARNING("init index from cahce fail");
@@ -775,6 +775,14 @@ OLAPStatus SegmentReader::_read_all_data_streams(size_t* 
buffer_size) {
             continue;
         }
 
+        //skip unnecessary stream init
+        //if the query doesn't inclde this column, we needn't init the stream 
for this column
+        //to reduce the call times for ByteBuffer::create and decrease the 
TCMalloc pressure
+        if (!_is_column_included(unique_column_id)) {
+            *buffer_size += OLAP_DEFAULT_COLUMN_STREAM_BUFFER_SIZE + 
sizeof(StreamHead);
+            continue;
+        }
+
         StreamName name(unique_column_id, message.kind());
         std::unique_ptr<ReadOnlyFileStream> stream(new(std::nothrow) 
ReadOnlyFileStream(
             &_file_handler,
@@ -845,7 +853,7 @@ OLAPStatus SegmentReader::_seek_to_block_directly(
         if (_column_indices[cid] == nullptr) {
             continue;
         }
-        
+
         OLAPStatus res = OLAP_SUCCESS;
         PositionProvider position(&_column_indices[cid]->entry(block_id));
         if (OLAP_SUCCESS != (res = _column_readers[cid]->seek(&position))) {


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org
For additional commands, e-mail: dev-h...@doris.apache.org

Reply via email to