(impala) 01/03: IMPALA-13567: Update RowsRead counter more frequently

asherman Tue, 26 Nov 2024 15:00:38 -0800

This is an automated email from the ASF dual-hosted git repository.

asherman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 0677b8a0bab5288713d2e1a00ff5ec3123ab284c
Author: Riza Suminto <[email protected]>
AuthorDate: Fri Nov 22 10:38:21 2024 -0800

    IMPALA-13567: Update RowsRead counter more frequently
    
    HdfsColumnarScanner implementation update RowsRead counter near the very
    end of AssembleRows function. Ideally, RowsRead should be incremented
    more frequently, every time before calling TransferScratchTuples where
    conjunct and runtime filter evaluation happen. That way, RowsRead is
    increased even if all rows in the scratch batch are filtered by conjunct
    or runtime filter.
    
    This patch move the counter increment just before TransferScratchTuples.
    
    Testing:
    - Pass core tests.
    
    Change-Id: I17f1c40a0e790750ffbd4e987ca181b82cc14c40
    Reviewed-on: http://gerrit.cloudera.org:8080/22102
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/orc/hdfs-orc-scanner.cc         | 2 +-
 be/src/exec/parquet/hdfs-parquet-scanner.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/be/src/exec/orc/hdfs-orc-scanner.cc 
b/be/src/exec/orc/hdfs-orc-scanner.cc
index 631c89ea8..a8c15126d 100644
--- a/be/src/exec/orc/hdfs-orc-scanner.cc
+++ b/be/src/exec/orc/hdfs-orc-scanner.cc
@@ -1005,6 +1005,7 @@ Status HdfsOrcScanner::AssembleRows(RowBatch* row_batch) {
         end_of_stripe_ = true;
         return Status::OK();
       }
+      COUNTER_ADD(scan_node_->rows_read_counter(), 
orc_root_batch_->numElements);
       num_rows_read += orc_root_batch_->numElements;
     }
 
@@ -1013,7 +1014,6 @@ Status HdfsOrcScanner::AssembleRows(RowBatch* row_batch) {
     continue_execution &= !scan_node_->ReachedLimitShared() && 
!context_->cancelled();
   }
   stripe_rows_read_ += num_rows_read;
-  COUNTER_ADD(scan_node_->rows_read_counter(), num_rows_read);
   // Merge Scanner-local counter into HdfsScanNode counter and reset.
   COUNTER_ADD(scan_node_->collection_items_read_counter(), 
coll_items_read_counter_);
   coll_items_read_counter_ = 0;
diff --git a/be/src/exec/parquet/hdfs-parquet-scanner.cc 
b/be/src/exec/parquet/hdfs-parquet-scanner.cc
index ad9d0c3ed..9e33ff360 100644
--- a/be/src/exec/parquet/hdfs-parquet-scanner.cc
+++ b/be/src/exec/parquet/hdfs-parquet-scanner.cc
@@ -2328,13 +2328,13 @@ Status 
HdfsParquetScanner::AssembleRowsWithoutLateMaterialization(
       last_num_tuples = scratch_batch_->num_tuples;
     }
     RETURN_IF_ERROR(CheckPageFiltering());
+    COUNTER_ADD(scan_node_->rows_read_counter(), scratch_batch_->num_tuples);
     num_rows_read += scratch_batch_->num_tuples;
     int num_row_to_commit = TransferScratchTuples(row_batch);
     RETURN_IF_ERROR(CommitRows(row_batch, num_row_to_commit));
     if (row_batch->AtCapacity()) break;
   }
   row_group_rows_read_ += num_rows_read;
-  COUNTER_ADD(scan_node_->rows_read_counter(), num_rows_read);
   // Merge Scanner-local counter into HdfsScanNode counter and reset.
   COUNTER_ADD(scan_node_->collection_items_read_counter(), 
coll_items_read_counter_);
   coll_items_read_counter_ = 0;

(impala) 01/03: IMPALA-13567: Update RowsRead counter more frequently

Reply via email to