This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 01cc057 [Bug][Vectorized] fix core dump with HLL and some refactor of
Decompressor (#8668)
01cc057 is described below
commit 01cc0573aa13d99d24a2d86e1dbec73704e1012e
Author: Pxl <[email protected]>
AuthorDate: Thu Mar 31 17:05:08 2022 +0800
[Bug][Vectorized] fix core dump with HLL and some refactor of Decompressor
(#8668)
---
.licenserc.yaml | 1 +
be/src/exec/decompressor.cpp | 4 +-
be/src/exec/decompressor.h | 54 ++++++++++------------
be/src/exec/line_reader.h | 2 +-
be/src/exec/lzo_decompressor.cpp | 2 -
be/src/exec/plain_text_line_reader.h | 10 ++--
be/src/vec/data_types/data_type_factory.cpp | 5 ++
regression-test/common/load/bitmap_basic_agg.sql | 4 ++
regression-test/common/load/hll_basic_agg.sql | 4 ++
regression-test/common/table/bitmap_basic_agg.sql | 6 +++
regression-test/common/table/hll_basic_agg.sql | 6 +++
regression-test/data/aggregate/aggregate.out | 1 +
.../data/demo/select_union_all_action.out | 1 +
regression-test/data/demo/thread_action.out | 4 +-
.../complex_types/basic_agg_test.out} | 12 +++--
.../types/complex_types/basic_agg_test.groovy | 21 ++++-----
16 files changed, 78 insertions(+), 59 deletions(-)
diff --git a/.licenserc.yaml b/.licenserc.yaml
index f65094d..b9a6a6e 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -52,6 +52,7 @@ header:
- 'be/src/util/sse2neon.h'
- 'be/src/util/utf8_check.cpp'
- 'build-support/run_clang_format.py'
+ - 'regression-test/common'
- 'regression-test/suites'
- 'regression-test/data'
diff --git a/be/src/exec/decompressor.cpp b/be/src/exec/decompressor.cpp
index 2f215f4..3d56ac3 100644
--- a/be/src/exec/decompressor.cpp
+++ b/be/src/exec/decompressor.cpp
@@ -55,8 +55,6 @@ Status Decompressor::create_decompressor(CompressType type,
Decompressor** decom
return st;
}
-Decompressor::~Decompressor() {}
-
std::string Decompressor::debug_info() {
return "Decompressor";
}
@@ -71,7 +69,7 @@ GzipDecompressor::~GzipDecompressor() {
}
Status GzipDecompressor::init() {
- _z_strm = {0};
+ _z_strm = {nullptr};
_z_strm.zalloc = Z_NULL;
_z_strm.zfree = Z_NULL;
_z_strm.opaque = Z_NULL;
diff --git a/be/src/exec/decompressor.h b/be/src/exec/decompressor.h
index 08228da..55bdd03 100644
--- a/be/src/exec/decompressor.h
+++ b/be/src/exec/decompressor.h
@@ -34,7 +34,7 @@ enum CompressType { UNCOMPRESSED, GZIP, DEFLATE, BZIP2,
LZ4FRAME, LZOP };
class Decompressor {
public:
- virtual ~Decompressor();
+ virtual ~Decompressor() = default;
// implement in derived class
// input(in): buf where decompress begin
@@ -71,19 +71,18 @@ protected:
class GzipDecompressor : public Decompressor {
public:
- virtual ~GzipDecompressor();
+ ~GzipDecompressor() override;
- virtual Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read,
- uint8_t* output, size_t output_max_len, size_t*
decompressed_len,
- bool* stream_end, size_t* more_input_bytes,
- size_t* more_output_bytes) override;
+ Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read, uint8_t* output,
+ size_t output_max_len, size_t* decompressed_len, bool*
stream_end,
+ size_t* more_input_bytes, size_t* more_output_bytes)
override;
- virtual std::string debug_info() override;
+ std::string debug_info() override;
private:
friend class Decompressor;
GzipDecompressor(bool is_deflate);
- virtual Status init() override;
+ Status init() override;
private:
bool _is_deflate;
@@ -97,19 +96,18 @@ private:
class Bzip2Decompressor : public Decompressor {
public:
- virtual ~Bzip2Decompressor();
+ ~Bzip2Decompressor() override;
- virtual Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read,
- uint8_t* output, size_t output_max_len, size_t*
decompressed_len,
- bool* stream_end, size_t* more_input_bytes,
- size_t* more_output_bytes) override;
+ Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read, uint8_t* output,
+ size_t output_max_len, size_t* decompressed_len, bool*
stream_end,
+ size_t* more_input_bytes, size_t* more_output_bytes)
override;
- virtual std::string debug_info() override;
+ std::string debug_info() override;
private:
friend class Decompressor;
Bzip2Decompressor() : Decompressor(CompressType::BZIP2) {}
- virtual Status init() override;
+ Status init() override;
private:
bz_stream _bz_strm;
@@ -117,19 +115,18 @@ private:
class Lz4FrameDecompressor : public Decompressor {
public:
- virtual ~Lz4FrameDecompressor();
+ ~Lz4FrameDecompressor() override;
- virtual Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read,
- uint8_t* output, size_t output_max_len, size_t*
decompressed_len,
- bool* stream_end, size_t* more_input_bytes,
- size_t* more_output_bytes) override;
+ Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read, uint8_t* output,
+ size_t output_max_len, size_t* decompressed_len, bool*
stream_end,
+ size_t* more_input_bytes, size_t* more_output_bytes)
override;
- virtual std::string debug_info() override;
+ std::string debug_info() override;
private:
friend class Decompressor;
Lz4FrameDecompressor() : Decompressor(CompressType::LZ4FRAME) {}
- virtual Status init() override;
+ Status init() override;
size_t get_block_size(const LZ4F_frameInfo_t* info);
@@ -142,20 +139,19 @@ private:
#ifdef DORIS_WITH_LZO
class LzopDecompressor : public Decompressor {
public:
- virtual ~LzopDecompressor();
+ ~LzopDecompressor() override = default;
- virtual Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read,
- uint8_t* output, size_t output_max_len, size_t*
decompressed_len,
- bool* stream_end, size_t* more_input_bytes,
- size_t* more_output_bytes) override;
+ Status decompress(uint8_t* input, size_t input_len, size_t*
input_bytes_read, uint8_t* output,
+ size_t output_max_len, size_t* decompressed_len, bool*
stream_end,
+ size_t* more_input_bytes, size_t* more_output_bytes)
override;
- virtual std::string debug_info() override;
+ std::string debug_info() override;
private:
friend class Decompressor;
LzopDecompressor()
: Decompressor(CompressType::LZOP), _header_info({0}),
_is_header_loaded(false) {}
- virtual Status init() override;
+ Status init() override;
private:
enum LzoChecksum { CHECK_NONE, CHECK_CRC32, CHECK_ADLER };
diff --git a/be/src/exec/line_reader.h b/be/src/exec/line_reader.h
index 06450ed..80a8f94 100644
--- a/be/src/exec/line_reader.h
+++ b/be/src/exec/line_reader.h
@@ -24,7 +24,7 @@ namespace doris {
// This class is used for CSV scanner, to read content line by line
class LineReader {
public:
- virtual ~LineReader() {}
+ virtual ~LineReader() = default;
virtual Status read_line(const uint8_t** ptr, size_t* size, bool* eof) = 0;
virtual void close() = 0;
diff --git a/be/src/exec/lzo_decompressor.cpp b/be/src/exec/lzo_decompressor.cpp
index 4704299..43c0a04 100644
--- a/be/src/exec/lzo_decompressor.cpp
+++ b/be/src/exec/lzo_decompressor.cpp
@@ -48,8 +48,6 @@ const uint64_t LzopDecompressor::F_ADLER32_C = 0x00000002L;
const uint64_t LzopDecompressor::F_CRC32_D = 0x00000100L;
const uint64_t LzopDecompressor::F_ADLER32_D = 0x00000001L;
-LzopDecompressor::~LzopDecompressor() {}
-
Status LzopDecompressor::init() {
return Status::OK();
}
diff --git a/be/src/exec/plain_text_line_reader.h
b/be/src/exec/plain_text_line_reader.h
index 2b8aad6..c0fb06a 100644
--- a/be/src/exec/plain_text_line_reader.h
+++ b/be/src/exec/plain_text_line_reader.h
@@ -32,18 +32,18 @@ public:
Decompressor* decompressor, size_t length,
const std::string& line_delimiter, size_t
line_delimiter_length);
- virtual ~PlainTextLineReader();
+ ~PlainTextLineReader() override;
- virtual Status read_line(const uint8_t** ptr, size_t* size, bool* eof)
override;
+ Status read_line(const uint8_t** ptr, size_t* size, bool* eof) override;
- virtual void close() override;
+ void close() override;
private:
bool update_eof();
- inline size_t output_buf_read_remaining() { return _output_buf_limit -
_output_buf_pos; }
+ inline size_t output_buf_read_remaining() const { return _output_buf_limit
- _output_buf_pos; }
- inline size_t input_buf_read_remaining() { return _input_buf_limit -
_input_buf_pos; }
+ inline size_t input_buf_read_remaining() const { return _input_buf_limit -
_input_buf_pos; }
inline bool done() { return _file_eof && output_buf_read_remaining() == 0;
}
diff --git a/be/src/vec/data_types/data_type_factory.cpp
b/be/src/vec/data_types/data_type_factory.cpp
index 3190963..29b00a6 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -20,6 +20,8 @@
#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_hll.h"
+
namespace doris::vectorized {
DataTypePtr DataTypeFactory::create_data_type(const doris::Field& col_desc) {
@@ -239,6 +241,9 @@ DataTypePtr DataTypeFactory::create_data_type(const
PColumnMeta& pcolumn) {
case PGenericType::BITMAP:
nested = std::make_shared<DataTypeBitMap>();
break;
+ case PGenericType::HLL:
+ nested = std::make_shared<DataTypeHLL>();
+ break;
case PGenericType::LIST:
DCHECK(pcolumn.children_size() == 1);
nested =
std::make_shared<DataTypeArray>(create_data_type(pcolumn.children(0)));
diff --git a/regression-test/common/load/bitmap_basic_agg.sql
b/regression-test/common/load/bitmap_basic_agg.sql
new file mode 100644
index 0000000..a0dc336
--- /dev/null
+++ b/regression-test/common/load/bitmap_basic_agg.sql
@@ -0,0 +1,4 @@
+insert into bitmap_basic_agg values
+(1,bitmap_empty()),
+(2,bitmap_hash(0)),(2,bitmap_hash(0)),
+(3,bitmap_hash(0)),(3,bitmap_hash(1));
diff --git a/regression-test/common/load/hll_basic_agg.sql
b/regression-test/common/load/hll_basic_agg.sql
new file mode 100644
index 0000000..ae92522
--- /dev/null
+++ b/regression-test/common/load/hll_basic_agg.sql
@@ -0,0 +1,4 @@
+insert into hll_basic_agg values
+(1,hll_empty()),
+(2,hll_hash(0)),(2,hll_hash(0)),
+(3,hll_hash(0)),(3,hll_hash(1));
diff --git a/regression-test/common/table/bitmap_basic_agg.sql
b/regression-test/common/table/bitmap_basic_agg.sql
new file mode 100644
index 0000000..a8e97d4
--- /dev/null
+++ b/regression-test/common/table/bitmap_basic_agg.sql
@@ -0,0 +1,6 @@
+create TABLE `bitmap_basic_agg` (
+ `k1` int(11) NULL,
+ `k2` bitmap BITMAP_UNION NULL
+)AGGREGATE KEY(`k1`)
+DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+PROPERTIES("replication_num" = "1");
diff --git a/regression-test/common/table/hll_basic_agg.sql
b/regression-test/common/table/hll_basic_agg.sql
new file mode 100644
index 0000000..e0d1b8e
--- /dev/null
+++ b/regression-test/common/table/hll_basic_agg.sql
@@ -0,0 +1,6 @@
+create TABLE `hll_basic_agg` (
+ `k1` int(11) NULL,
+ `k2` hll HLL_UNION NULL
+)AGGREGATE KEY(`k1`)
+DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+PROPERTIES("replication_num" = "1");
diff --git a/regression-test/data/aggregate/aggregate.out
b/regression-test/data/aggregate/aggregate.out
index 964b083..b035863 100644
--- a/regression-test/data/aggregate/aggregate.out
+++ b/regression-test/data/aggregate/aggregate.out
@@ -73,3 +73,4 @@ TESTING AGAIN
-- !aggregate --
9223845.04 1607.2585798911111
+
diff --git a/regression-test/data/demo/select_union_all_action.out
b/regression-test/data/demo/select_union_all_action.out
index 7cdd5df..e3b3bdc 100644
--- a/regression-test/data/demo/select_union_all_action.out
+++ b/regression-test/data/demo/select_union_all_action.out
@@ -8,3 +8,4 @@
0 abc
1 123
2 \N
+
diff --git a/regression-test/data/demo/thread_action.out
b/regression-test/data/demo/thread_action.out
index 4f14b39..dd9024b 100644
--- a/regression-test/data/demo/thread_action.out
+++ b/regression-test/data/demo/thread_action.out
@@ -1,7 +1,7 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
--- !diffrent_tag1 --
+-- !diffrent_tag2 --
100
--- !diffrent_tag2 --
+-- !diffrent_tag1 --
100
diff --git a/regression-test/data/demo/thread_action.out
b/regression-test/data/types/complex_types/basic_agg_test.out
similarity index 60%
copy from regression-test/data/demo/thread_action.out
copy to regression-test/data/types/complex_types/basic_agg_test.out
index 4f14b39..057cc00 100644
--- a/regression-test/data/demo/thread_action.out
+++ b/regression-test/data/types/complex_types/basic_agg_test.out
@@ -1,7 +1,11 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
--- !diffrent_tag1 --
-100
+-- !sql_bitmap --
+1 \N
+2 \N
+3 \N
--- !diffrent_tag2 --
-100
+-- !sql_hll --
+1 \N
+2 \N
+3 \N
diff --git a/be/src/exec/line_reader.h
b/regression-test/suites/types/complex_types/basic_agg_test.groovy
similarity index 68%
copy from be/src/exec/line_reader.h
copy to regression-test/suites/types/complex_types/basic_agg_test.groovy
index 06450ed..c489d94 100644
--- a/be/src/exec/line_reader.h
+++ b/regression-test/suites/types/complex_types/basic_agg_test.groovy
@@ -15,19 +15,14 @@
// specific language governing permissions and limitations
// under the License.
-#pragma once
+def tables=["bitmap_basic_agg","hll_basic_agg"]
-#include "common/status.h"
+for (String table in tables) {
+ sql """drop table if exists ${table};"""
+ sql new File("""regression-test/common/table/${table}.sql""").text
+ sql new File("""regression-test/common/load/${table}.sql""").text
+}
-namespace doris {
+qt_sql_bitmap """select * from bitmap_basic_agg;"""
-// This class is used for CSV scanner, to read content line by line
-class LineReader {
-public:
- virtual ~LineReader() {}
- virtual Status read_line(const uint8_t** ptr, size_t* size, bool* eof) = 0;
-
- virtual void close() = 0;
-};
-
-} // namespace doris
+qt_sql_hll """select * from hll_basic_agg;"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]