This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d2fe24308b6 [Fix](parquet-reader) Fix definition level rle decode dead
loop in parquet-reader. (#39523)
d2fe24308b6 is described below
commit d2fe24308b64ddf592d72362aab110e346e985c1
Author: Qi Chen <[email protected]>
AuthorDate: Mon Aug 26 23:26:53 2024 +0800
[Fix](parquet-reader) Fix definition level rle decode dead loop in
parquet-reader. (#39523)
---
be/src/util/bit_stream_utils.h | 4 ++++
be/src/util/rle_encoding.h | 2 ++
be/src/vec/exec/format/parquet/level_decoder.h | 4 +++-
.../exec/format/parquet/vparquet_column_reader.cpp | 20 ++++++++++++++++++--
4 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/be/src/util/bit_stream_utils.h b/be/src/util/bit_stream_utils.h
index 550919440a8..b9b3621cf8b 100644
--- a/be/src/util/bit_stream_utils.h
+++ b/be/src/util/bit_stream_utils.h
@@ -145,6 +145,10 @@ public:
bool is_initialized() const { return buffer_ != nullptr; }
+ const uint8_t* buffer() const { return buffer_; }
+
+ int max_bytes() const { return max_bytes_; }
+
private:
// Used by SeekToBit() and GetValue() to fetch the
// the next word into buffer_.
diff --git a/be/src/util/rle_encoding.h b/be/src/util/rle_encoding.h
index be4df12916b..206349b4728 100644
--- a/be/src/util/rle_encoding.h
+++ b/be/src/util/rle_encoding.h
@@ -120,6 +120,8 @@ public:
// Get current repeated value, make sure that count equals repeated_count()
T get_repeated_value(size_t count);
+ const BitReader& bit_reader() const { return bit_reader_; }
+
private:
bool ReadHeader();
diff --git a/be/src/vec/exec/format/parquet/level_decoder.h
b/be/src/vec/exec/format/parquet/level_decoder.h
index 4f76ac06837..de2f80d7f12 100644
--- a/be/src/vec/exec/format/parquet/level_decoder.h
+++ b/be/src/vec/exec/format/parquet/level_decoder.h
@@ -56,6 +56,8 @@ public:
inline void rewind_one() { _rle_decoder.RewindOne(); }
+ const RleDecoder<level_t>& rle_decoder() const { return _rle_decoder; }
+
private:
tparquet::Encoding::type _encoding;
level_t _bit_width = 0;
@@ -65,4 +67,4 @@ private:
BitReader _bit_packed_decoder;
};
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index c51a51bac3c..c31c63ee87c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -209,7 +209,15 @@ Status ScalarColumnReader::_skip_values(size_t num_values)
{
level_t def_level = -1;
size_t loop_skip = def_decoder.get_next_run(&def_level, num_values
- skipped);
if (loop_skip == 0) {
- continue;
+ std::stringstream ss;
+ auto& bit_reader = def_decoder.rle_decoder().bit_reader();
+ ss << "def_decoder buffer (hex): ";
+ for (size_t i = 0; i < bit_reader.max_bytes(); ++i) {
+ ss << std::hex << std::setw(2) << std::setfill('0')
+ << static_cast<int>(bit_reader.buffer()[i]) << " ";
+ }
+ LOG(WARNING) << ss.str();
+ return Status::InternalError("Failed to decode definition
level.");
}
if (def_level == 0) {
null_size += loop_skip;
@@ -254,7 +262,15 @@ Status ScalarColumnReader::_read_values(size_t num_values,
ColumnPtr& doris_colu
level_t def_level;
size_t loop_read = def_decoder.get_next_run(&def_level,
num_values - has_read);
if (loop_read == 0) {
- continue;
+ std::stringstream ss;
+ auto& bit_reader = def_decoder.rle_decoder().bit_reader();
+ ss << "def_decoder buffer (hex): ";
+ for (size_t i = 0; i < bit_reader.max_bytes(); ++i) {
+ ss << std::hex << std::setw(2) << std::setfill('0')
+ << static_cast<int>(bit_reader.buffer()[i]) << " ";
+ }
+ LOG(WARNING) << ss.str();
+ return Status::InternalError("Failed to decode definition
level.");
}
bool is_null = def_level == 0;
if (!(prev_is_null ^ is_null)) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]