This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new aa9bdd76d0e [Pick](Variant) pick some fix #38413 #38364 (#38512)
aa9bdd76d0e is described below
commit aa9bdd76d0e3fae013bdcc3a8b7b640a6ae6bd02
Author: lihangyu <[email protected]>
AuthorDate: Wed Jul 31 11:03:31 2024 +0800
[Pick](Variant) pick some fix #38413 #38364 (#38512)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 5 ++
be/src/vec/columns/column_object.cpp | 61 +++++++++++++++++++++-
be/src/vec/columns/column_object.h | 2 +
regression-test/data/variant_p0/rqg/rqg4.out | 16 ++++++
regression-test/suites/variant_p0/rqg/rqg4.sql | 6 +++
5 files changed, 88 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 9040cbf3e27..21998060716 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -687,6 +687,11 @@ Status
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
if (_opts.io_ctx.reader_type == ReaderType::READER_QUERY) {
RowRanges dict_row_ranges = RowRanges::create_single(num_rows());
for (auto cid : cids) {
+ if (!_segment->can_apply_predicate_safely(cid,
+
_opts.col_id_to_predicates.at(cid).get(),
+ *_schema,
_opts.io_ctx.reader_type)) {
+ continue;
+ }
RowRanges tmp_row_ranges =
RowRanges::create_single(num_rows());
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_dict(
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index dc09039222a..a03e8635c4d 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -512,6 +512,7 @@ MutableColumnPtr ColumnObject::apply_for_subcolumns(Func&&
func) const {
res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(),
subcolumn->data.get_least_common_type());
}
+ check_consistency();
return res;
}
ColumnPtr ColumnObject::index(const IColumn& indexes, size_t limit) const {
@@ -519,6 +520,20 @@ ColumnPtr ColumnObject::index(const IColumn& indexes,
size_t limit) const {
[&](const auto& subcolumn) { return subcolumn.index(indexes,
limit); });
}
+void ColumnObject::resize(size_t n) {
+ if (n == num_rows) {
+ return;
+ }
+ if (n > num_rows) {
+ insert_many_defaults(n - num_rows);
+ } else {
+ for (auto& subcolumn : subcolumns) {
+ subcolumn->data.pop_back(num_rows - n);
+ }
+ }
+ num_rows = n;
+}
+
bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) {
if (num_rows < config::variant_threshold_rows_to_estimate_sparse_column) {
return false;
@@ -705,8 +720,16 @@ MutableColumnPtr ColumnObject::clone_resized(size_t
new_size) const {
if (new_size == 0) {
return ColumnObject::create(is_nullable);
}
- return apply_for_subcolumns(
+ // If subcolumns are empty, then res will be empty but new_size > 0
+ if (subcolumns.empty()) {
+ // Add an emtpy column with new_size rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(new_size);
+ return res;
+ }
+ auto res = apply_for_subcolumns(
[&](const auto& subcolumn) { return
subcolumn.clone_resized(new_size); });
+ return res;
}
size_t ColumnObject::byte_size() const {
@@ -846,7 +869,10 @@ Field ColumnObject::operator[](size_t n) const {
}
void ColumnObject::get(size_t n, Field& res) const {
- assert(n < size());
+ if (UNLIKELY(n >= size())) {
+ throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+ "Index ({}) for getting field is out of range",
n);
+ }
res = VariantMap();
auto& object = res.get<VariantMap&>();
@@ -894,11 +920,32 @@ void ColumnObject::insert_range_from(const IColumn& src,
size_t start, size_t le
}
ColumnPtr ColumnObject::replicate(const Offsets& offsets) const {
+ if (subcolumns.empty()) {
+ // Add an emtpy column with offsets.back rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(offsets.back());
+ }
return apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.replicate(offsets);
});
}
ColumnPtr ColumnObject::permute(const Permutation& perm, size_t limit) const {
+ if (subcolumns.empty()) {
+ if (limit == 0) {
+ limit = num_rows;
+ } else {
+ limit = std::min(num_rows, limit);
+ }
+
+ if (perm.size() < limit) {
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR,
+ "Size of permutation is less than
required.");
+ }
+ // Add an emtpy column with limit rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(limit);
+ return res;
+ }
return apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.permute(perm,
limit); });
}
@@ -1428,6 +1475,12 @@ ColumnPtr ColumnObject::filter(const Filter& filter,
ssize_t count) const {
return finalized_object.apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.filter(filter,
count); });
}
+ if (subcolumns.empty()) {
+ // Add an emtpy column with filtered rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(count_bytes_in_filter(filter));
+ return res;
+ }
auto new_column = ColumnObject::create(true, false);
for (auto& entry : subcolumns) {
auto subcolumn = entry->data.get_finalized_column().filter(filter,
count);
@@ -1441,6 +1494,10 @@ Status ColumnObject::filter_by_selector(const uint16_t*
sel, size_t sel_size, IC
if (!is_finalized()) {
finalize();
}
+ if (subcolumns.empty()) {
+ assert_cast<ColumnObject*>(col_ptr)->insert_many_defaults(sel_size);
+ return Status::OK();
+ }
auto* res = assert_cast<ColumnObject*>(col_ptr);
for (const auto& subcolumn : subcolumns) {
auto new_subcolumn =
subcolumn->data.get_least_common_type()->create_column();
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 657889b2de3..2369b407658 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -363,6 +363,8 @@ public:
void clear() override;
+ void resize(size_t n) override;
+
void clear_subcolumns_data();
std::string get_name() const override {
diff --git a/regression-test/data/variant_p0/rqg/rqg4.out
b/regression-test/data/variant_p0/rqg/rqg4.out
new file mode 100644
index 00000000000..ec975af29e3
--- /dev/null
+++ b/regression-test/data/variant_p0/rqg/rqg4.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !rqg4 --
+0
+
+-- !rqg4_2 --
+500
+
+-- !rqg4_3 --
+500
+
+-- !rqg4_4 --
+70
+
+-- !rqg4_5 --
+70
+
diff --git a/regression-test/suites/variant_p0/rqg/rqg4.sql
b/regression-test/suites/variant_p0/rqg/rqg4.sql
new file mode 100644
index 00000000000..775bb2a576a
--- /dev/null
+++ b/regression-test/suites/variant_p0/rqg/rqg4.sql
@@ -0,0 +1,6 @@
+CREATE TABLE table_500_undef_partitions2_keys3_properties4_distributed_by52 (
pk int, var VARIANT NULL ) engine=olap DUPLICATE KEY(pk) distributed by
hash(pk) buckets 10 properties("replication_num" = "1");
+INSERT INTO
table_500_undef_partitions2_keys3_properties4_distributed_by52(pk,var) VALUES
('0','{\"col_int_undef_signed\": 1, \"col_int_undef_signed2\": 2,
\"col_date_undef_signed\": \"2025-06-18\", \"col_date_undef_signed2\":
\"2024-02-18\", \"col_varchar_10__undef_signed\": \"i\",
\"col_varchar_1024__undef_signed\": \"i\"}'),('1','{\"col_int_undef_signed\":
0, \"col_int_undef_signed2\": 5, \"col_date_undef_signed\": \"2026-02-18\",
\"col_date_undef_signed2\": \"2023-12-19\", \"col_varc [...]
+INSERT INTO
table_500_undef_partitions2_keys3_properties4_distributed_by52(pk,var) VALUES
('0','{\"col_int_undef_signed\": 6, \"col_int_undef_signed2\": 7,
\"col_date_undef_signed\": \"2023-12-17\", \"col_date_undef_signed2\":
\"2023-12-20\", \"col_varchar_10__undef_signed\": \"j\",
\"col_varchar_1024__undef_signed\": \"u\"}'),('1','{\"col_int_undef_signed\":
1, \"col_int_undef_signed2\": 3, \"col_date_undef_signed\": \"2025-02-18\",
\"col_date_undef_signed2\": \"2023-12-16\", \"col_varc [...]
+
+SELECT COUNT() AS field1 FROM
table_500_undef_partitions2_keys3_properties4_distributed_by52 AS table1 WHERE
( CAST(table1 . var['col_date_undef_signed'] AS text) IN ( '2027-01-16',
'2023-12-15', '2023-12-15' ));
+SELECT COUNT() AS field1 FROM
table_500_undef_partitions2_keys3_properties4_distributed_by52 AS table1 WHERE
( CAST(table1 . var['col_date_undef_signed'] AS datetime) IN ( '2027-01-16',
'2023-12-15', '2023-12-15' ));
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]