This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new d846e4c6e98 branch-3.0: [fix](column_complex) wrong type of Field
returned by ColumnComplex (#43718)
d846e4c6e98 is described below
commit d846e4c6e9842420c756a4bea1f151ce84f76bc3
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Nov 13 15:36:33 2024 +0800
branch-3.0: [fix](column_complex) wrong type of Field returned by
ColumnComplex (#43718)
Cherry-picked from #43515
Co-authored-by: Jerry Hu <[email protected]>
---
be/src/exec/es/es_scroll_parser.cpp | 2 +-
be/src/vec/columns/column_complex.h | 5 +-
be/src/vec/columns/column_fixed_length_object.h | 6 +-
be/src/vec/columns/column_string.h | 4 +-
be/src/vec/common/schema_util.cpp | 2 +-
be/src/vec/core/field.cpp | 2 +-
be/src/vec/core/field.h | 56 ++------------
.../vec/data_types/data_type_fixed_length_object.h | 2 +-
be/src/vec/data_types/data_type_jsonb.h | 2 +-
be/src/vec/data_types/data_type_object.h | 4 +-
be/src/vec/data_types/data_type_string.cpp | 2 +-
be/src/vec/data_types/data_type_string.h | 2 +-
.../data_types/serde/data_type_object_serde.cpp | 5 +-
be/src/vec/json/parse2column.cpp | 2 +-
.../compaction/index_compaction_test.cpp | 4 +-
.../index_compaction_with_deleted_term.cpp | 4 +-
.../aggregate_functions/agg_min_max_by_test.cpp | 2 +-
be/test/vec/columns/column_hash_func_test.cpp | 2 +-
be/test/vec/columns/column_nullable_test.h | 2 +-
be/test/vec/core/column_complex_test.cpp | 87 ++++++++++++++++++++--
be/test/vec/core/field_test.cpp | 2 +-
.../data_types/serde/data_type_serde_pb_test.cpp | 6 +-
.../data_types/serde/data_type_to_string_test.cpp | 10 +--
.../vec/function/function_array_element_test.cpp | 2 +-
be/test/vec/function/function_array_index_test.cpp | 4 +-
be/test/vec/function/function_array_size_test.cpp | 12 +--
.../vec/function/function_arrays_overlap_test.cpp | 6 +-
.../function_compressed_materialization_test.cpp | 4 +-
be/test/vec/function/function_string_test.cpp | 10 +--
be/test/vec/function/table_function_test.cpp | 4 +-
be/test/vec/jsonb/serialize_test.cpp | 2 +-
.../data/datatype_p0/complex_types/test_map.out | 3 +
.../datatype_p0/complex_types/test_map.groovy | 35 +++++++++
33 files changed, 188 insertions(+), 109 deletions(-)
diff --git a/be/src/exec/es/es_scroll_parser.cpp
b/be/src/exec/es/es_scroll_parser.cpp
index f8dfbd0d85e..f745ac34e65 100644
--- a/be/src/exec/es/es_scroll_parser.cpp
+++ b/be/src/exec/es/es_scroll_parser.cpp
@@ -488,7 +488,7 @@ Status process_single_column(const rapidjson::Value& col,
PrimitiveType sub_type
bool pure_doc_value, vectorized::Array& array) {
T val;
RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
- array.push_back(val);
+ array.push_back(vectorized::Field(val));
return Status::OK();
}
diff --git a/be/src/vec/columns/column_complex.h
b/be/src/vec/columns/column_complex.h
index feeb8f71b9d..31aa0bc12eb 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -20,6 +20,8 @@
#pragma once
+#include <glog/logging.h>
+
#include <vector>
#include "olap/hll.h"
@@ -129,13 +131,14 @@ public:
MutableColumnPtr clone_resized(size_t size) const override;
void insert(const Field& x) override {
+ DCHECK_EQ(x.get_type(), Field::TypeToEnum<T>::value);
const T& s = doris::vectorized::get<const T&>(x);
data.push_back(s);
}
Field operator[](size_t n) const override {
assert(n < size());
- return {reinterpret_cast<const char*>(&data[n]), sizeof(data[n])};
+ return Field(data[n]);
}
void get(size_t n, Field& res) const override {
diff --git a/be/src/vec/columns/column_fixed_length_object.h
b/be/src/vec/columns/column_fixed_length_object.h
index b83f11ff98a..1f92816ba04 100644
--- a/be/src/vec/columns/column_fixed_length_object.h
+++ b/be/src/vec/columns/column_fixed_length_object.h
@@ -105,11 +105,13 @@ public:
}
Field operator[](size_t n) const override {
- return {_data.data() + n * _item_size, _item_size};
+ return Field(
+ String(reinterpret_cast<const char*>(_data.data() + n *
_item_size), _item_size));
}
void get(size_t n, Field& res) const override {
- res.assign_string(_data.data() + n * _item_size, _item_size);
+ res = Field(
+ String(reinterpret_cast<const char*>(_data.data() + n *
_item_size), _item_size));
}
StringRef get_data_at(size_t n) const override {
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index b441b81613b..8a073ef08cb 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -122,7 +122,7 @@ public:
Field operator[](size_t n) const override {
assert(n < size());
- return Field(&chars[offset_at(n)], size_at(n));
+ return Field(String(reinterpret_cast<const
char*>(&chars[offset_at(n)]), size_at(n)));
}
void get(size_t n, Field& res) const override {
@@ -132,7 +132,7 @@ public:
res = JsonbField(reinterpret_cast<const
char*>(&chars[offset_at(n)]), size_at(n));
return;
}
- res.assign_string(&chars[offset_at(n)], size_at(n));
+ res = Field(String(reinterpret_cast<const
char*>(&chars[offset_at(n)]), size_at(n)));
}
StringRef get_data_at(size_t n) const override {
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index 4545a383910..fd50af3e1fc 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -578,7 +578,7 @@ Status extract(ColumnPtr source, const PathInData& path,
MutableColumnPtr& dst)
: std::make_shared<DataTypeJsonb>();
ColumnsWithTypeAndName arguments {
{source, json_type, ""},
- {type_string->create_column_const(1, Field(jsonpath.data(),
jsonpath.size())),
+ {type_string->create_column_const(1, Field(String(jsonpath.data(),
jsonpath.size()))),
type_string, ""}};
auto function =
SimpleFunctionFactory::instance().get_function("jsonb_extract",
arguments, json_type);
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
index 8cb07f27c7c..e652fc2dc9e 100644
--- a/be/src/vec/core/field.cpp
+++ b/be/src/vec/core/field.cpp
@@ -74,7 +74,7 @@ void read_binary(Array& x, BufferReadable& buf) {
case Field::Types::String: {
std::string value;
doris::vectorized::read_string_binary(value, buf);
- x.push_back(value);
+ x.push_back(Field(value));
break;
}
case Field::Types::JSONB: {
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 87459f19ce6..8113dc602fb 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -452,43 +452,20 @@ public:
Field(Field&& rhs) { create(std::move(rhs)); }
+ // Make the constructor with a String parameter explicit to prevent
accidentally creating a Field with the wrong string type.
+ // Other types don't require explicit construction to avoid extensive
modifications.
template <typename T>
requires(!std::is_same_v<std::decay_t<T>, Field>)
- Field(T&& rhs);
-
- /// Create a string inplace.
- Field(const char* data, size_t size) { create(data, size); }
-
- Field(const unsigned char* data, size_t size) { create(data, size); }
-
- /// NOTE In case when field already has string type, more direct assign is
possible.
- void assign_string(const char* data, size_t size) {
- destroy();
- create(data, size);
- }
-
- void assign_string(const unsigned char* data, size_t size) {
- destroy();
- create(data, size);
- }
-
- void assign_jsonb(const char* data, size_t size) {
- destroy();
- create_jsonb(data, size);
- }
-
- void assign_jsonb(const unsigned char* data, size_t size) {
- destroy();
- create_jsonb(data, size);
- }
+ explicit(std::is_same_v<std::decay_t<T>, String>) Field(T&& rhs);
Field& operator=(const Field& rhs) {
if (this != &rhs) {
if (which != rhs.which) {
destroy();
create(rhs);
- } else
+ } else {
assign(rhs); /// This assigns string or vector without
deallocation of existing buffer.
+ }
}
return *this;
}
@@ -503,8 +480,9 @@ public:
if (which != rhs.which) {
destroy();
create(std::move(rhs));
- } else
+ } else {
assign(std::move(rhs));
+ }
}
return *this;
}
@@ -731,7 +709,6 @@ private:
*ptr = std::forward<T>(x);
}
-private:
void create(const Field& x) {
dispatch([this](auto& value) { create_concrete(value); }, x);
}
@@ -748,25 +725,6 @@ private:
dispatch([this](auto& value) { assign_concrete(std::move(value)); },
x);
}
- void create(const char* data, size_t size) {
- new (&storage) String(data, size);
- which = Types::String;
- }
-
- void create(const unsigned char* data, size_t size) {
- create(reinterpret_cast<const char*>(data), size);
- }
-
- void create_jsonb(const char* data, size_t size) {
- new (&storage) JsonbField(data, size);
- which = Types::JSONB;
- }
-
- void create_jsonb(const unsigned char* data, size_t size) {
- new (&storage) JsonbField(reinterpret_cast<const char*>(data), size);
- which = Types::JSONB;
- }
-
ALWAYS_INLINE void destroy() {
if (which < Types::MIN_NON_POD) {
return;
diff --git a/be/src/vec/data_types/data_type_fixed_length_object.h
b/be/src/vec/data_types/data_type_fixed_length_object.h
index cc3a74429d7..af923ddce18 100644
--- a/be/src/vec/data_types/data_type_fixed_length_object.h
+++ b/be/src/vec/data_types/data_type_fixed_length_object.h
@@ -60,7 +60,7 @@ public:
return doris::FieldType::OLAP_FIELD_TYPE_NONE;
}
- Field get_default() const override { return String(); }
+ Field get_default() const override { return Field(String()); }
[[noreturn]] Field get_field(const TExprNode& node) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
diff --git a/be/src/vec/data_types/data_type_jsonb.h
b/be/src/vec/data_types/data_type_jsonb.h
index 0577e5ed449..3d681e3ce79 100644
--- a/be/src/vec/data_types/data_type_jsonb.h
+++ b/be/src/vec/data_types/data_type_jsonb.h
@@ -78,7 +78,7 @@ public:
DCHECK_EQ(node.node_type, TExprNodeType::JSON_LITERAL);
DCHECK(node.__isset.json_literal);
JsonBinaryValue value(node.json_literal.value);
- return String(value.value(), value.size());
+ return Field(String(value.value(), value.size()));
}
bool equals(const IDataType& rhs) const override;
diff --git a/be/src/vec/data_types/data_type_object.h
b/be/src/vec/data_types/data_type_object.h
index 2959b3dc074..ec60cde9f92 100644
--- a/be/src/vec/data_types/data_type_object.h
+++ b/be/src/vec/data_types/data_type_object.h
@@ -81,10 +81,10 @@ public:
Field get_field(const TExprNode& node) const override {
if (node.__isset.string_literal) {
- return node.string_literal.value;
+ return Field(node.string_literal.value);
}
if (node.node_type == TExprNodeType::NULL_LITERAL) {
- return Field();
+ return {};
}
std::stringstream error_string;
node.printTo(error_string);
diff --git a/be/src/vec/data_types/data_type_string.cpp
b/be/src/vec/data_types/data_type_string.cpp
index d2c2ae2c0b0..4ed51a00128 100644
--- a/be/src/vec/data_types/data_type_string.cpp
+++ b/be/src/vec/data_types/data_type_string.cpp
@@ -64,7 +64,7 @@ Status DataTypeString::from_string(ReadBuffer& rb, IColumn*
column) const {
}
Field DataTypeString::get_default() const {
- return String();
+ return Field(String());
}
MutableColumnPtr DataTypeString::create_column() const {
diff --git a/be/src/vec/data_types/data_type_string.h
b/be/src/vec/data_types/data_type_string.h
index abac6bc4b04..dd937168611 100644
--- a/be/src/vec/data_types/data_type_string.h
+++ b/be/src/vec/data_types/data_type_string.h
@@ -75,7 +75,7 @@ public:
Field get_field(const TExprNode& node) const override {
DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
DCHECK(node.__isset.string_literal);
- return node.string_literal.value;
+ return Field(node.string_literal.value);
}
bool equals(const IDataType& rhs) const override;
diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp
b/be/src/vec/data_types/serde/data_type_object_serde.cpp
index 530646e9b4d..383add39354 100644
--- a/be/src/vec/data_types/serde/data_type_object_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp
@@ -26,6 +26,7 @@
#include "vec/common/assert_cast.h"
#include "vec/common/schema_util.h"
#include "vec/core/field.h"
+#include "vec/core/types.h"
#ifdef __AVX2__
#include "util/jsonb_parser_simd.h"
@@ -117,11 +118,11 @@ void
DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV
Field field;
if (arg->isBinary()) {
const auto* blob = static_cast<const JsonbBlobVal*>(arg);
- field.assign_jsonb(blob->getBlob(), blob->getBlobLen());
+ field = JsonbField(blob->getBlob(), blob->getBlobLen());
} else if (arg->isString()) {
// not a valid jsonb type, insert as string
const auto* str = static_cast<const JsonbStringVal*>(arg);
- field.assign_string(str->getBlob(), str->getBlobLen());
+ field = Field(String(str->getBlob(), str->getBlobLen()));
} else {
throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid jsonb
type");
}
diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index aa5fc5eb8ed..ba18083a95c 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -149,7 +149,7 @@ void parse_json_to_variant(IColumn& column, const char*
src, size_t length,
}
// Treat as string
PathInData root_path;
- Field field(src, length);
+ Field field(String(src, length));
result = ParseResult {{root_path}, {field}};
}
auto& [paths, values] = *result;
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
index 5e3370847e9..aed83201a63 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
@@ -289,8 +289,8 @@ TEST_F(IndexCompactionTest, write_index_test) {
auto columns = block.mutate_columns();
for (const auto& row : data[i]) {
vectorized::Field key = Int32(row.key);
- vectorized::Field v1 = row.word;
- vectorized::Field v2 = row.url;
+ vectorized::Field v1(row.word);
+ vectorized::Field v2(row.url);
vectorized::Field v3 = Int32(row.num);
columns[0]->insert(key);
columns[1]->insert(v1);
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
index 321d43fa872..8b5d403fca4 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
@@ -582,8 +582,8 @@ TEST_F(IndexCompactionDeleteTest, delete_index_test) {
auto columns = block.mutate_columns();
for (const auto& row : data[i]) {
vectorized::Field key = Int32(row.key);
- vectorized::Field v1 = row.word;
- vectorized::Field v2 = row.url;
+ vectorized::Field v1(row.word);
+ vectorized::Field v2(row.url);
vectorized::Field v3 = Int32(row.num);
columns[0]->insert(key);
columns[1]->insert(v1);
diff --git a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
index 137f4fc70b1..b1a3e9ed483 100644
--- a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
+++ b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
@@ -71,7 +71,7 @@ TEST_P(AggMinMaxByTest, min_max_by_test) {
min_pair.first = str_val;
min_pair.second = i;
}
- column_vector_key_str->insert(cast_to_nearest_field_type(str_val));
+
column_vector_key_str->insert(Field(cast_to_nearest_field_type(str_val)));
}
// Prepare test function and parameters.
diff --git a/be/test/vec/columns/column_hash_func_test.cpp
b/be/test/vec/columns/column_hash_func_test.cpp
index 7b2d5f2dddd..c49f1e0a578 100644
--- a/be/test/vec/columns/column_hash_func_test.cpp
+++ b/be/test/vec/columns/column_hash_func_test.cpp
@@ -242,7 +242,7 @@ TEST(HashFuncTest, StructTypeTestWithSepcificValueCrcHash) {
Tuple t;
t.push_back(Int64(1));
- t.push_back(String("hello"));
+ t.push_back(Field(String("hello")));
DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes);
std::cout << a->get_name() << std::endl;
diff --git a/be/test/vec/columns/column_nullable_test.h
b/be/test/vec/columns/column_nullable_test.h
index 0f90a25c9b5..f371ff13fb2 100644
--- a/be/test/vec/columns/column_nullable_test.h
+++ b/be/test/vec/columns/column_nullable_test.h
@@ -83,7 +83,7 @@ inline MutableColumnPtr create_nested_column(size_t
input_rows_count) {
if constexpr (std::is_integral_v<T>) {
column->insert(rand() % std::numeric_limits<T>::max());
} else if constexpr (std::is_same_v<T, String>) {
- column->insert(generate_random_string(rand() % 512));
+ column->insert(Field(generate_random_string(rand() % 512)));
} else if constexpr (std::is_same_v<T, Decimal64>) {
column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
} else {
diff --git a/be/test/vec/core/column_complex_test.cpp
b/be/test/vec/core/column_complex_test.cpp
index 589a705e072..a0fbcccdd15 100644
--- a/be/test/vec/core/column_complex_test.cpp
+++ b/be/test/vec/core/column_complex_test.cpp
@@ -17,8 +17,10 @@
#include "vec/columns/column_complex.h"
+#include <glog/logging.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
#include <stddef.h>
#include <memory>
@@ -26,6 +28,8 @@
#include "agent/be_exec_version_manager.h"
#include "gtest/gtest_pred_impl.h"
+#include "util/bitmap_value.h"
+#include "vec/core/field.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_quantilestate.h"
@@ -72,12 +76,12 @@ public:
}
void check_serialize_and_deserialize(MutableColumnPtr& col) {
- auto column = assert_cast<ColumnBitmap*>(col.get());
+ auto* column = assert_cast<ColumnBitmap*>(col.get());
auto size = _bitmap_type.get_uncompressed_serialized_bytes(
*column, BeExecVersionManager::get_newest_version());
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
- auto result = _bitmap_type.serialize(*column, buf.get(),
-
BeExecVersionManager::get_newest_version());
+ auto* result = _bitmap_type.serialize(*column, buf.get(),
+
BeExecVersionManager::get_newest_version());
ASSERT_EQ(result, buf.get() + size);
auto column2 = _bitmap_type.create_column();
@@ -85,6 +89,19 @@ public:
check_bitmap_column(*column, *column2.get());
}
+ void check_field_type(MutableColumnPtr& col) {
+ auto& column = assert_cast<ColumnBitmap&>(*col.get());
+ auto dst_column = ColumnBitmap::create();
+ const auto rows = column.size();
+ for (size_t i = 0; i != rows; ++i) {
+ auto field = column[i];
+ ASSERT_EQ(field.get_type(), Field::Types::Bitmap);
+ dst_column->insert(field);
+ }
+
+ check_bitmap_column(column, *dst_column);
+ }
+
private:
DataTypeBitMap _bitmap_type;
};
@@ -94,7 +111,7 @@ public:
virtual void SetUp() override {}
virtual void TearDown() override {}
- void check_bitmap_column(const IColumn& l, const IColumn& r) {
+ void check_quantile_state_column(const IColumn& l, const IColumn& r) {
ASSERT_EQ(l.size(), r.size());
const auto& l_col = assert_cast<const ColumnQuantileState&>(l);
const auto& r_col = assert_cast<const ColumnQuantileState&>(r);
@@ -117,7 +134,20 @@ public:
auto column2 = _quantile_state_type.create_column();
_quantile_state_type.deserialize(buf.get(), &column2,
BeExecVersionManager::get_newest_version());
- check_bitmap_column(*column, *column2.get());
+ check_quantile_state_column(*column, *column2.get());
+ }
+
+ void check_field_type(MutableColumnPtr& col) {
+ auto& column = assert_cast<ColumnQuantileState&>(*col.get());
+ auto dst_column = ColumnQuantileState::create();
+ const auto rows = column.size();
+ for (size_t i = 0; i != rows; ++i) {
+ auto field = column[i];
+ ASSERT_EQ(field.get_type(), Field::Types::QuantileState);
+ dst_column->insert(field);
+ }
+
+ check_quantile_state_column(column, *dst_column);
}
private:
@@ -153,6 +183,38 @@ TEST_F(ColumnBitmapTest, ColumnBitmapReadWrite) {
EXPECT_TRUE(bitmap.contains(1000000));
}
+TEST_F(ColumnBitmapTest, OperatorValidate) {
+ auto column = _bitmap_type.create_column();
+
+ // empty column
+ check_serialize_and_deserialize(column);
+
+ // bitmap with lots of rows
+ const size_t row_size = 128;
+ auto& data = assert_cast<ColumnBitmap&>(*column.get()).get_data();
+ data.reserve(row_size);
+
+ for (size_t i = 0; i != row_size; ++i) {
+ BitmapValue bitmap_value;
+ for (size_t j = 0; j <= i; ++j) {
+ bitmap_value.add(j);
+ }
+ data.emplace_back(std::move(bitmap_value));
+ }
+
+ auto& bitmap_column = assert_cast<ColumnBitmap&>(*column.get());
+ for (size_t i = 0; i != row_size; ++i) {
+ auto field = bitmap_column[i];
+ ASSERT_EQ(field.get_type(), Field::Types::Bitmap);
+ const auto& bitmap = vectorized::get<BitmapValue&>(field);
+
+ ASSERT_EQ(bitmap.cardinality(), i + 1);
+ for (size_t j = 0; j <= i; ++j) {
+ ASSERT_TRUE(bitmap.contains(j));
+ }
+ }
+}
+
TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) {
auto column = _quantile_state_type.create_column();
// empty column
@@ -180,4 +242,19 @@ TEST_F(ColumnQuantileStateTest,
ColumnQuantileStateReadWrite) {
check_serialize_and_deserialize(column);
}
+TEST_F(ColumnQuantileStateTest, OperatorValidate) {
+ auto column = _quantile_state_type.create_column();
+
+ // empty column
+ check_serialize_and_deserialize(column);
+
+ // bitmap with lots of rows
+ const size_t row_size = 20000;
+ auto& data = assert_cast<ColumnQuantileState&>(*column.get()).get_data();
+ data.resize(row_size);
+ check_serialize_and_deserialize(column);
+
+ check_field_type(column);
+}
+
} // namespace doris::vectorized
diff --git a/be/test/vec/core/field_test.cpp b/be/test/vec/core/field_test.cpp
index a3542735c50..71d26ea4979 100644
--- a/be/test/vec/core/field_test.cpp
+++ b/be/test/vec/core/field_test.cpp
@@ -39,7 +39,7 @@ TEST(VFieldTest, field_string) {
ASSERT_EQ(f.get<String>(), "Hello, world (4)");
f = Array {Field {String {"Hello, world (5)"}}};
ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (5)");
- f = Array {String {"Hello, world (6)"}};
+ f = Array {Field(String {"Hello, world (6)"})};
ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (6)");
}
diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
index b64ddee1d2c..852614e84c5 100644
--- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp
@@ -583,10 +583,10 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct) {
DataTypePtr m =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr>
{s, d, m});
Tuple t1, t2;
- t1.push_back(String("amory cute"));
+ t1.push_back(Field(String("amory cute")));
t1.push_back(__int128_t(37));
t1.push_back(true);
- t2.push_back("null");
+ t2.push_back(Field("null"));
t2.push_back(__int128_t(26));
t2.push_back(false);
MutableColumnPtr struct_column = st->create_column();
@@ -614,7 +614,7 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct2) {
DataTypePtr m =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
DataTypePtr st = std::make_shared<DataTypeStruct>(std::vector<DataTypePtr>
{s, d, m});
Tuple t1, t2;
- t1.push_back(String("amory cute"));
+ t1.push_back(Field(String("amory cute")));
t1.push_back(37);
t1.push_back(true);
t2.push_back("null");
diff --git a/be/test/vec/data_types/serde/data_type_to_string_test.cpp
b/be/test/vec/data_types/serde/data_type_to_string_test.cpp
index fe2e05d10a1..d605e73ced3 100644
--- a/be/test/vec/data_types/serde/data_type_to_string_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_to_string_test.cpp
@@ -45,9 +45,9 @@ TEST(ToStringMethodTest, DataTypeToStringTest) {
a1.push_back(Null());
a1.push_back(UInt64(12345678));
a1.push_back(UInt64(0));
- a2.push_back(String("hello amory"));
- a2.push_back("NULL");
- a2.push_back(String("cute amory"));
+ a2.push_back(Field(String("hello amory")));
+ a2.push_back(Field("NULL"));
+ a2.push_back(Field(String("cute amory")));
a2.push_back(Null());
Map m;
m.push_back(a1);
@@ -55,11 +55,11 @@ TEST(ToStringMethodTest, DataTypeToStringTest) {
Tuple t;
t.push_back(Int128(12345454342));
- t.push_back(String("amory cute"));
+ t.push_back(Field(String("amory cute")));
t.push_back(UInt64(0));
cases.field_values = {UInt64(12),
- String(" hello amory , cute amory "),
+ Field(String(" hello amory , cute amory ")),
DecimalField<Decimal32>(-12345678, 0),
a1,
a2,
diff --git a/be/test/vec/function/function_array_element_test.cpp
b/be/test/vec/function/function_array_element_test.cpp
index 16ce28f5259..bf25ea4386c 100644
--- a/be/test/vec/function/function_array_element_test.cpp
+++ b/be/test/vec/function/function_array_element_test.cpp
@@ -148,7 +148,7 @@ TEST(function_array_element_test, element_at) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String,
TypeIndex::Int32};
- Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)};
+ Array vec = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
DataSet data_set = {{{vec, 1}, std::string("abc")},
{{vec, 2}, std::string("")},
{{vec, 10}, Null()},
diff --git a/be/test/vec/function/function_array_index_test.cpp
b/be/test/vec/function/function_array_index_test.cpp
index 24bd5797869..1a037818b10 100644
--- a/be/test/vec/function/function_array_index_test.cpp
+++ b/be/test/vec/function/function_array_index_test.cpp
@@ -152,7 +152,7 @@ TEST(function_array_index_test, array_contains) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String,
TypeIndex::String};
- Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)};
+ Array vec = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
DataSet data_set = {{{vec, std::string("abc")}, UInt8(1)},
{{vec, std::string("aaa")}, UInt8(0)},
{{vec, std::string("")}, UInt8(1)},
@@ -252,7 +252,7 @@ TEST(function_array_index_test, array_position) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String,
TypeIndex::String};
- Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)};
+ Array vec = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
DataSet data_set = {{{vec, std::string("abc")}, Int64(1)},
{{vec, std::string("aaa")}, Int64(0)},
{{vec, std::string("")}, Int64(2)},
diff --git a/be/test/vec/function/function_array_size_test.cpp
b/be/test/vec/function/function_array_size_test.cpp
index 3fa710f6844..c853a56930d 100644
--- a/be/test/vec/function/function_array_size_test.cpp
+++ b/be/test/vec/function/function_array_size_test.cpp
@@ -47,8 +47,8 @@ TEST(function_array_size_test, size) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
- Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
- Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)};
+ Array vec1 = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
+ Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)),
Field(String("def", 3))};
DataSet data_set = {{{vec1}, Int64(3)},
{{vec2}, Int64(3)},
{{Null()}, Null()},
@@ -76,8 +76,8 @@ TEST(function_array_size_test, cardinality) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
- Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
- Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)};
+ Array vec1 = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
+ Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)),
Field(String("def", 3))};
DataSet data_set = {{{vec1}, Int64(3)},
{{vec2}, Int64(3)},
{{Null()}, Null()},
@@ -105,8 +105,8 @@ TEST(function_array_size_test, array_size) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
- Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
- Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)};
+ Array vec1 = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
+ Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)),
Field(String("def", 3))};
DataSet data_set = {{{vec1}, Int64(3)},
{{vec2}, Int64(3)},
{{Null()}, Null()},
diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp
b/be/test/vec/function/function_arrays_overlap_test.cpp
index 4a13d41b0a3..3297f5fc281 100644
--- a/be/test/vec/function/function_arrays_overlap_test.cpp
+++ b/be/test/vec/function/function_arrays_overlap_test.cpp
@@ -124,9 +124,9 @@ TEST(function_arrays_overlap_test, arrays_overlap) {
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String,
TypeIndex::Array,
TypeIndex::String};
- Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
- Array vec2 = {Field("abc", 3)};
- Array vec3 = {Field("", 0)};
+ Array vec1 = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
+ Array vec2 = {Field(String("abc", 3))};
+ Array vec3 = {Field(String("", 0))};
DataSet data_set = {{{vec1, vec2}, UInt8(1)},
{{vec1, vec3}, UInt8(1)},
{{Null(), vec1}, Null()},
diff --git a/be/test/vec/function/function_compressed_materialization_test.cpp
b/be/test/vec/function/function_compressed_materialization_test.cpp
index 2553fc82fc7..432fbf78529 100644
--- a/be/test/vec/function/function_compressed_materialization_test.cpp
+++ b/be/test/vec/function/function_compressed_materialization_test.cpp
@@ -111,7 +111,7 @@ void encode_and_decode(size_t len_of_varchar, std::string
function_name) {
continue;
} else {
std::string random_bytes =
generate_random_len_and_random_bytes(m);
- col_source_str_mutate->insert(Field(random_bytes.c_str(),
random_bytes.size()));
+ col_source_str_mutate->insert(Field(random_bytes));
}
}
@@ -185,7 +185,7 @@ TEST(CompressedMaterializationTest, abnormal_test) {
for (size_t i = 0; i < input_rows_count; ++i) {
std::string random_bytes = generate_random_bytes(16);
- col_source_str_mutate->insert(Field(random_bytes.c_str(),
random_bytes.size()));
+ col_source_str_mutate->insert(Field(random_bytes));
}
auto col_source_str = std::move(col_source_str_mutate);
diff --git a/be/test/vec/function/function_string_test.cpp
b/be/test/vec/function/function_string_test.cpp
index 5d1d6fb9d8b..f4381505276 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -1417,11 +1417,11 @@ TEST(function_string_test, function_concat_ws_test) {
{
BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Array,
TypeIndex::String};
- Array vec1 = {Field("", 0), Field("", 0), Field("", 0)};
- Array vec2 = {Field("123", 3), Field("456", 3), Field("789", 3)};
- Array vec3 = {Field("", 0), Field("?", 1), Field("", 0)};
- Array vec4 = {Field("abc", 3), Field("", 0), Field("def", 3)};
- Array vec5 = {Field("abc", 3), Field("def", 3), Field("ghi", 3)};
+ Array vec1 = {Field(String("", 0)), Field(String("", 0)),
Field(String("", 0))};
+ Array vec2 = {Field(String("123", 3)), Field(String("456", 3)),
Field(String("789", 3))};
+ Array vec3 = {Field(String("", 0)), Field(String("?", 1)),
Field(String("", 0))};
+ Array vec4 = {Field(String("abc", 3)), Field(String("", 0)),
Field(String("def", 3))};
+ Array vec5 = {Field(String("abc", 3)), Field(String("def", 3)),
Field(String("ghi", 3))};
DataSet data_set = {{{std::string("-"), vec1}, std::string("--")},
{{std::string(""), vec2},
std::string("123456789")},
{{std::string("-"), vec3}, std::string("-?-")},
diff --git a/be/test/vec/function/table_function_test.cpp
b/be/test/vec/function/table_function_test.cpp
index a5c49dbdba9..43d37f6bf73 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -97,7 +97,7 @@ TEST_F(TableFunctionTest, vexplode_outer) {
// explode_outer(Array<String>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
- Array vec = {std::string("abc"), std::string(""), std::string("def")};
+ Array vec = {Field(std::string("abc")), Field(std::string("")),
Field(std::string("def"))};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
InputTypeSet output_types = {TypeIndex::String};
@@ -144,7 +144,7 @@ TEST_F(TableFunctionTest, vexplode) {
// explode(Array<String>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
- Array vec = {std::string("abc"), std::string(""), std::string("def")};
+ Array vec = {Field(std::string("abc")), Field(std::string("")),
Field(std::string("def"))};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
InputTypeSet output_types = {TypeIndex::String};
diff --git a/be/test/vec/jsonb/serialize_test.cpp
b/be/test/vec/jsonb/serialize_test.cpp
index 3845c689e1e..86244c6ca34 100644
--- a/be/test/vec/jsonb/serialize_test.cpp
+++ b/be/test/vec/jsonb/serialize_test.cpp
@@ -277,7 +277,7 @@ TEST(BlockSerializeTest, Struct) {
DataTypePtr m =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
DataTypePtr st =
std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m});
Tuple t1, t2;
- t1.push_back(String("amory cute"));
+ t1.push_back(Field(String("amory cute")));
t1.push_back(__int128_t(37));
t1.push_back(true);
t2.push_back("null");
diff --git a/regression-test/data/datatype_p0/complex_types/test_map.out
b/regression-test/data/datatype_p0/complex_types/test_map.out
index 4ac971fb3a1..03c9853b8e8 100644
--- a/regression-test/data/datatype_p0/complex_types/test_map.out
+++ b/regression-test/data/datatype_p0/complex_types/test_map.out
@@ -14,3 +14,6 @@
6 3 {"key3":"value3", "key33":"value33", "key3333":"value333"}
6 3
7 4 {"key4":"value4", "key44":"value44", "key444":"value444",
"key4444":"value4444"} \N \N
+-- !sql2 --
+3 true true true
+
diff --git a/regression-test/suites/datatype_p0/complex_types/test_map.groovy
b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
index 4dd0272f517..b985ef61008 100644
--- a/regression-test/suites/datatype_p0/complex_types/test_map.groovy
+++ b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
@@ -51,4 +51,39 @@ suite("test_map") {
qt_sql """
select * from test_map_table left join test_map_table_right on
test_map_table.k1 = test_map_table_right.value order by 1,2,4,5;
"""
+
+ sql "DROP TABLE IF EXISTS `task_map_agg_with_bitmap`"
+ sql """
+ CREATE TABLE `task_map_agg_with_bitmap` (
+ `cache_key` varchar(65533) NOT NULL,
+ `result_cnt` int NULL COMMENT '人群包人数'
+ ) ENGINE = OLAP duplicate KEY(`cache_key`) COMMENT 'OLAP' DISTRIBUTED
BY HASH(`cache_key`) BUCKETS 1 PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql 'insert into `task_map_agg_with_bitmap` values ("aa",null);'
+ sql 'insert into `task_map_agg_with_bitmap` values ("bb",null);'
+ sql 'insert into `task_map_agg_with_bitmap` values ("bb",1);'
+ sql 'insert into `task_map_agg_with_bitmap` values ("bb",2);'
+ sql 'insert into `task_map_agg_with_bitmap` values ("bb",3);'
+
+ qt_sql2 """
+ select bitmap_count(id_map['2024-11-03']) cnt,
+ bitmap_contains(id_map['2024-11-03'], 1) c1,
+ bitmap_contains(id_map['2024-11-03'], 2) c2,
+ bitmap_contains(id_map['2024-11-03'], 3) c3
+ from (
+ select
+ map_agg(tag_logymd, result) id_map
+ from
+ (
+ select
+ '2024-11-03' tag_logymd,
+ bitmap_agg(result_cnt) result
+ from
+ `task_map_agg_with_bitmap`
+ ) t1
+ ) t2;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]