This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1907153ab5092b13ed0ab0cb52b66705cc7a903f Author: Riza Suminto <[email protected]> AuthorDate: Fri Jan 3 15:27:32 2025 -0800 IMPALA-13641: Lazily init Parquet column read counters ParquetUncompressedBytesReadPerColumn and ParquetCompressedBytesReadPerColumn exist in runtime profile even when no parquet file is read (all scan text files). This patch lazily init those counters only if HdfsScanNodeBase::bytes_read_per_col_ is not empty. Testing: - Run and pass TestParquet::test_bytes_read_per_column. - Run TestTpcdsInsert and confirm no Parquet specific counters exist when reading TEXTFILE table. Change-Id: I8ba767b69b8c432f0eb954aa54f86876b329160c Reviewed-on: http://gerrit.cloudera.org:8080/22297 Reviewed-by: Michael Smith <[email protected]> Reviewed-by: Csaba Ringhofer <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/hdfs-scan-node-base.cc | 29 +++++++++++++++-------------- be/src/exec/hdfs-scan-node-base.h | 4 ---- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc index 51046243c..70495e01f 100644 --- a/be/src/exec/hdfs-scan-node-base.cc +++ b/be/src/exec/hdfs-scan-node-base.cc @@ -641,11 +641,6 @@ Status HdfsScanNodeBase::Open(RuntimeState* state) { initial_range_actual_reservation_stats_ = PROFILE_InitialRangeActualReservation.Instantiate(runtime_profile()); - uncompressed_bytes_read_per_column_counter_ = - PROFILE_ParquetUncompressedBytesReadPerColumn.Instantiate(runtime_profile()); - compressed_bytes_read_per_column_counter_ = - PROFILE_ParquetCompressedBytesReadPerColumn.Instantiate(runtime_profile()); - bytes_read_local_ = PROFILE_BytesReadLocal.Instantiate(runtime_profile()); bytes_read_short_circuit_ = PROFILE_BytesReadShortCircuit.Instantiate(runtime_profile()); @@ -1259,15 +1254,21 @@ void HdfsScanNodeBase::StopAndFinalizeCounters() { { shared_lock<shared_mutex> bytes_read_per_col_guard_read_lock( bytes_read_per_col_lock_); - for (const auto& bytes_read : bytes_read_per_col_) { - int64_t uncompressed_bytes_read = bytes_read.second.uncompressed_bytes_read.Load(); - if (uncompressed_bytes_read > 0) { - uncompressed_bytes_read_per_column_counter_->UpdateCounter( - uncompressed_bytes_read); - } - int64_t compressed_bytes_read = bytes_read.second.compressed_bytes_read.Load(); - if (compressed_bytes_read > 0) { - compressed_bytes_read_per_column_counter_->UpdateCounter(compressed_bytes_read); + if (!bytes_read_per_col_.empty()) { + auto uncompressed_bytes_counter = + PROFILE_ParquetUncompressedBytesReadPerColumn.Instantiate(runtime_profile()); + auto compressed_bytes_counter = + PROFILE_ParquetCompressedBytesReadPerColumn.Instantiate(runtime_profile()); + for (const auto& bytes_read : bytes_read_per_col_) { + int64_t uncompressed_bytes_read = + bytes_read.second.uncompressed_bytes_read.Load(); + if (uncompressed_bytes_read > 0) { + uncompressed_bytes_counter->UpdateCounter(uncompressed_bytes_read); + } + int64_t compressed_bytes_read = bytes_read.second.compressed_bytes_read.Load(); + if (compressed_bytes_read > 0) { + compressed_bytes_counter->UpdateCounter(compressed_bytes_read); + } } } } diff --git a/be/src/exec/hdfs-scan-node-base.h b/be/src/exec/hdfs-scan-node-base.h index 02a157f56..ed4c6948b 100644 --- a/be/src/exec/hdfs-scan-node-base.h +++ b/be/src/exec/hdfs-scan-node-base.h @@ -785,10 +785,6 @@ class HdfsScanNodeBase : public ScanNode { RuntimeProfile::Counter* hdfs_open_file_timer_ = nullptr; RuntimeProfile::SummaryStatsCounter* initial_range_ideal_reservation_stats_ = nullptr; RuntimeProfile::SummaryStatsCounter* initial_range_actual_reservation_stats_ = nullptr; - RuntimeProfile::SummaryStatsCounter* compressed_bytes_read_per_column_counter_ = - nullptr; - RuntimeProfile::SummaryStatsCounter* uncompressed_bytes_read_per_column_counter_ = - nullptr; /// HDFS read thread concurrency bucket: bucket[i] refers to the number of sample /// taken where there are i concurrent hdfs read thread running. Created in Open().
