This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit f306fb046e31c7d053dda9adde8f1751890286cb Author: Pxl <[email protected]> AuthorDate: Thu Mar 31 17:05:08 2022 +0800 [Bug][Vectorized] fix core dump with HLL and some refactor of Decompressor (#8668) --- .licenserc.yaml | 1 + be/src/exec/decompressor.cpp | 4 +- be/src/exec/decompressor.h | 54 ++++++++++------------ be/src/exec/line_reader.h | 2 +- be/src/exec/lzo_decompressor.cpp | 2 - be/src/exec/plain_text_line_reader.h | 10 ++-- regression-test/common/load/bitmap_basic_agg.sql | 4 ++ regression-test/common/load/hll_basic_agg.sql | 4 ++ regression-test/common/table/bitmap_basic_agg.sql | 6 +++ regression-test/common/table/hll_basic_agg.sql | 6 +++ regression-test/data/aggregate/aggregate.out | 1 + .../data/types/complex_types/basic_agg_test.out | 11 +++++ .../types/complex_types/basic_agg_test.groovy | 21 ++++----- 13 files changed, 73 insertions(+), 53 deletions(-) diff --git a/.licenserc.yaml b/.licenserc.yaml index 40c83fd..d3614cd 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -51,6 +51,7 @@ header: - 'be/src/util/sse2neon.h' - 'be/src/util/utf8_check.cpp' - 'build-support/run_clang_format.py' + - 'regression-test/common' - 'regression-test/suites' - 'regression-test/data' diff --git a/be/src/exec/decompressor.cpp b/be/src/exec/decompressor.cpp index 2f215f4..3d56ac3 100644 --- a/be/src/exec/decompressor.cpp +++ b/be/src/exec/decompressor.cpp @@ -55,8 +55,6 @@ Status Decompressor::create_decompressor(CompressType type, Decompressor** decom return st; } -Decompressor::~Decompressor() {} - std::string Decompressor::debug_info() { return "Decompressor"; } @@ -71,7 +69,7 @@ GzipDecompressor::~GzipDecompressor() { } Status GzipDecompressor::init() { - _z_strm = {0}; + _z_strm = {nullptr}; _z_strm.zalloc = Z_NULL; _z_strm.zfree = Z_NULL; _z_strm.opaque = Z_NULL; diff --git a/be/src/exec/decompressor.h b/be/src/exec/decompressor.h index 08228da..55bdd03 100644 --- a/be/src/exec/decompressor.h +++ b/be/src/exec/decompressor.h @@ -34,7 +34,7 @@ enum CompressType { UNCOMPRESSED, GZIP, DEFLATE, BZIP2, LZ4FRAME, LZOP }; class Decompressor { public: - virtual ~Decompressor(); + virtual ~Decompressor() = default; // implement in derived class // input(in): buf where decompress begin @@ -71,19 +71,18 @@ protected: class GzipDecompressor : public Decompressor { public: - virtual ~GzipDecompressor(); + ~GzipDecompressor() override; - virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, size_t* decompressed_len, - bool* stream_end, size_t* more_input_bytes, - size_t* more_output_bytes) override; + Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, uint8_t* output, + size_t output_max_len, size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) override; - virtual std::string debug_info() override; + std::string debug_info() override; private: friend class Decompressor; GzipDecompressor(bool is_deflate); - virtual Status init() override; + Status init() override; private: bool _is_deflate; @@ -97,19 +96,18 @@ private: class Bzip2Decompressor : public Decompressor { public: - virtual ~Bzip2Decompressor(); + ~Bzip2Decompressor() override; - virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, size_t* decompressed_len, - bool* stream_end, size_t* more_input_bytes, - size_t* more_output_bytes) override; + Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, uint8_t* output, + size_t output_max_len, size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) override; - virtual std::string debug_info() override; + std::string debug_info() override; private: friend class Decompressor; Bzip2Decompressor() : Decompressor(CompressType::BZIP2) {} - virtual Status init() override; + Status init() override; private: bz_stream _bz_strm; @@ -117,19 +115,18 @@ private: class Lz4FrameDecompressor : public Decompressor { public: - virtual ~Lz4FrameDecompressor(); + ~Lz4FrameDecompressor() override; - virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, size_t* decompressed_len, - bool* stream_end, size_t* more_input_bytes, - size_t* more_output_bytes) override; + Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, uint8_t* output, + size_t output_max_len, size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) override; - virtual std::string debug_info() override; + std::string debug_info() override; private: friend class Decompressor; Lz4FrameDecompressor() : Decompressor(CompressType::LZ4FRAME) {} - virtual Status init() override; + Status init() override; size_t get_block_size(const LZ4F_frameInfo_t* info); @@ -142,20 +139,19 @@ private: #ifdef DORIS_WITH_LZO class LzopDecompressor : public Decompressor { public: - virtual ~LzopDecompressor(); + ~LzopDecompressor() override = default; - virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, - uint8_t* output, size_t output_max_len, size_t* decompressed_len, - bool* stream_end, size_t* more_input_bytes, - size_t* more_output_bytes) override; + Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read, uint8_t* output, + size_t output_max_len, size_t* decompressed_len, bool* stream_end, + size_t* more_input_bytes, size_t* more_output_bytes) override; - virtual std::string debug_info() override; + std::string debug_info() override; private: friend class Decompressor; LzopDecompressor() : Decompressor(CompressType::LZOP), _header_info({0}), _is_header_loaded(false) {} - virtual Status init() override; + Status init() override; private: enum LzoChecksum { CHECK_NONE, CHECK_CRC32, CHECK_ADLER }; diff --git a/be/src/exec/line_reader.h b/be/src/exec/line_reader.h index 06450ed..80a8f94 100644 --- a/be/src/exec/line_reader.h +++ b/be/src/exec/line_reader.h @@ -24,7 +24,7 @@ namespace doris { // This class is used for CSV scanner, to read content line by line class LineReader { public: - virtual ~LineReader() {} + virtual ~LineReader() = default; virtual Status read_line(const uint8_t** ptr, size_t* size, bool* eof) = 0; virtual void close() = 0; diff --git a/be/src/exec/lzo_decompressor.cpp b/be/src/exec/lzo_decompressor.cpp index 4704299..43c0a04 100644 --- a/be/src/exec/lzo_decompressor.cpp +++ b/be/src/exec/lzo_decompressor.cpp @@ -48,8 +48,6 @@ const uint64_t LzopDecompressor::F_ADLER32_C = 0x00000002L; const uint64_t LzopDecompressor::F_CRC32_D = 0x00000100L; const uint64_t LzopDecompressor::F_ADLER32_D = 0x00000001L; -LzopDecompressor::~LzopDecompressor() {} - Status LzopDecompressor::init() { return Status::OK(); } diff --git a/be/src/exec/plain_text_line_reader.h b/be/src/exec/plain_text_line_reader.h index 2b8aad6..c0fb06a 100644 --- a/be/src/exec/plain_text_line_reader.h +++ b/be/src/exec/plain_text_line_reader.h @@ -32,18 +32,18 @@ public: Decompressor* decompressor, size_t length, const std::string& line_delimiter, size_t line_delimiter_length); - virtual ~PlainTextLineReader(); + ~PlainTextLineReader() override; - virtual Status read_line(const uint8_t** ptr, size_t* size, bool* eof) override; + Status read_line(const uint8_t** ptr, size_t* size, bool* eof) override; - virtual void close() override; + void close() override; private: bool update_eof(); - inline size_t output_buf_read_remaining() { return _output_buf_limit - _output_buf_pos; } + inline size_t output_buf_read_remaining() const { return _output_buf_limit - _output_buf_pos; } - inline size_t input_buf_read_remaining() { return _input_buf_limit - _input_buf_pos; } + inline size_t input_buf_read_remaining() const { return _input_buf_limit - _input_buf_pos; } inline bool done() { return _file_eof && output_buf_read_remaining() == 0; } diff --git a/regression-test/common/load/bitmap_basic_agg.sql b/regression-test/common/load/bitmap_basic_agg.sql new file mode 100644 index 0000000..a0dc336 --- /dev/null +++ b/regression-test/common/load/bitmap_basic_agg.sql @@ -0,0 +1,4 @@ +insert into bitmap_basic_agg values +(1,bitmap_empty()), +(2,bitmap_hash(0)),(2,bitmap_hash(0)), +(3,bitmap_hash(0)),(3,bitmap_hash(1)); diff --git a/regression-test/common/load/hll_basic_agg.sql b/regression-test/common/load/hll_basic_agg.sql new file mode 100644 index 0000000..ae92522 --- /dev/null +++ b/regression-test/common/load/hll_basic_agg.sql @@ -0,0 +1,4 @@ +insert into hll_basic_agg values +(1,hll_empty()), +(2,hll_hash(0)),(2,hll_hash(0)), +(3,hll_hash(0)),(3,hll_hash(1)); diff --git a/regression-test/common/table/bitmap_basic_agg.sql b/regression-test/common/table/bitmap_basic_agg.sql new file mode 100644 index 0000000..a8e97d4 --- /dev/null +++ b/regression-test/common/table/bitmap_basic_agg.sql @@ -0,0 +1,6 @@ +create TABLE `bitmap_basic_agg` ( + `k1` int(11) NULL, + `k2` bitmap BITMAP_UNION NULL +)AGGREGATE KEY(`k1`) +DISTRIBUTED BY HASH(`k1`) BUCKETS 1 +PROPERTIES("replication_num" = "1"); diff --git a/regression-test/common/table/hll_basic_agg.sql b/regression-test/common/table/hll_basic_agg.sql new file mode 100644 index 0000000..e0d1b8e --- /dev/null +++ b/regression-test/common/table/hll_basic_agg.sql @@ -0,0 +1,6 @@ +create TABLE `hll_basic_agg` ( + `k1` int(11) NULL, + `k2` hll HLL_UNION NULL +)AGGREGATE KEY(`k1`) +DISTRIBUTED BY HASH(`k1`) BUCKETS 1 +PROPERTIES("replication_num" = "1"); diff --git a/regression-test/data/aggregate/aggregate.out b/regression-test/data/aggregate/aggregate.out index 964b083..b035863 100644 --- a/regression-test/data/aggregate/aggregate.out +++ b/regression-test/data/aggregate/aggregate.out @@ -73,3 +73,4 @@ TESTING AGAIN -- !aggregate -- 9223845.04 1607.2585798911111 + diff --git a/regression-test/data/types/complex_types/basic_agg_test.out b/regression-test/data/types/complex_types/basic_agg_test.out new file mode 100644 index 0000000..057cc00 --- /dev/null +++ b/regression-test/data/types/complex_types/basic_agg_test.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_bitmap -- +1 \N +2 \N +3 \N + +-- !sql_hll -- +1 \N +2 \N +3 \N + diff --git a/be/src/exec/line_reader.h b/regression-test/suites/types/complex_types/basic_agg_test.groovy similarity index 68% copy from be/src/exec/line_reader.h copy to regression-test/suites/types/complex_types/basic_agg_test.groovy index 06450ed..c489d94 100644 --- a/be/src/exec/line_reader.h +++ b/regression-test/suites/types/complex_types/basic_agg_test.groovy @@ -15,19 +15,14 @@ // specific language governing permissions and limitations // under the License. -#pragma once +def tables=["bitmap_basic_agg","hll_basic_agg"] -#include "common/status.h" +for (String table in tables) { + sql """drop table if exists ${table};""" + sql new File("""regression-test/common/table/${table}.sql""").text + sql new File("""regression-test/common/load/${table}.sql""").text +} -namespace doris { +qt_sql_bitmap """select * from bitmap_basic_agg;""" -// This class is used for CSV scanner, to read content line by line -class LineReader { -public: - virtual ~LineReader() {} - virtual Status read_line(const uint8_t** ptr, size_t* size, bool* eof) = 0; - - virtual void close() = 0; -}; - -} // namespace doris +qt_sql_hll """select * from hll_basic_agg;""" --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
