This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 4ffd4a6d263 [Fix](SchemaChange) refactor variant root column iterator
to make row read corret (#41942)
4ffd4a6d263 is described below
commit 4ffd4a6d263c4d7b3e8e0dd99d5f8e6b601961e2
Author: lihangyu <[email protected]>
AuthorDate: Thu Oct 17 09:41:17 2024 +0800
[Fix](SchemaChange) refactor variant root column iterator to make row read
corret (#41942)
pick (#41700)
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 85 ++++++++++++-------------
be/src/olap/rowset/segment_v2/column_reader.h | 3 +
be/src/vec/columns/column_object.cpp | 6 ++
be/src/vec/columns/column_object.h | 2 +
4 files changed, 52 insertions(+), 44 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index e970250a973..e2e6e93f602 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -1581,28 +1581,15 @@ void
DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP
}
}
-Status VariantRootColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst,
- bool* has_null) {
- size_t size = dst->size();
+Status VariantRootColumnIterator::_process_root_column(
+ vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr&
root_column,
+ const vectorized::DataTypePtr& most_common_type) {
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);
- if (obj.is_null_root()) {
- obj.create_root();
- }
- if (!obj.is_finalized()) {
- obj.finalize();
- }
- auto root_column = obj.get_root();
- RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
- obj.incr_num_rows(*n);
- for (auto& entry : obj.get_subcolumns()) {
- if (entry->data.size() != size + *n) {
- entry->data.insert_many_defaults(*n);
- }
- }
+
// fill nullmap
if (root_column->is_nullable() && dst->is_nullable()) {
vectorized::ColumnUInt8& dst_null_map =
@@ -1611,47 +1598,57 @@ Status VariantRootColumnIterator::next_batch(size_t* n,
vectorized::MutableColum
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
}
+
+ // add root column to a tmp object column
+ auto tmp = vectorized::ColumnObject::create(true, false);
+ auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
+ tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
+
+ // merge tmp object column to dst
+ obj.insert_range_from(*tmp, 0, tmp->size());
+
+ // finalize object if needed
+ if (!obj.is_finalized()) {
+ obj.finalize();
+ }
+
#ifndef NDEBUG
obj.check_consistency();
#endif
+
return Status::OK();
}
+Status VariantRootColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst,
+ bool* has_null) {
+ // read root column
+ auto& obj =
+ dst->is_nullable()
+ ? assert_cast<vectorized::ColumnObject&>(
+
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+ : assert_cast<vectorized::ColumnObject&>(*dst);
+
+ auto most_common_type = obj.get_most_common_type();
+ auto root_column = most_common_type->create_column();
+ RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
+
+ return _process_root_column(dst, root_column, most_common_type);
+}
+
Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const
size_t count,
vectorized::MutableColumnPtr&
dst) {
- size_t size = dst->size();
+ // read root column
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);
- if (obj.is_null_root()) {
- obj.create_root();
- }
- if (!obj.is_finalized()) {
- obj.finalize();
- }
- auto root_column = obj.get_root();
+
+ auto most_common_type = obj.get_most_common_type();
+ auto root_column = most_common_type->create_column();
RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column));
- obj.incr_num_rows(count);
- for (auto& entry : obj.get_subcolumns()) {
- if (entry->data.size() != (size + count)) {
- entry->data.insert_many_defaults(count);
- }
- }
- // fill nullmap
- if (root_column->is_nullable() && dst->is_nullable()) {
- vectorized::ColumnUInt8& dst_null_map =
-
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
- vectorized::ColumnUInt8& src_null_map =
-
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
- DCHECK_EQ(src_null_map.size() - size, count);
- dst_null_map.insert_range_from(src_null_map, size, count);
- }
-#ifndef NDEBUG
- obj.check_consistency();
-#endif
- return Status::OK();
+
+ return _process_root_column(dst, root_column, most_common_type);
}
Status DefaultNestedColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst) {
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index 6727ea7dc81..187490d06fb 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -654,6 +654,9 @@ public:
ordinal_t get_current_ordinal() const override { return
_inner_iter->get_current_ordinal(); }
private:
+ Status _process_root_column(vectorized::MutableColumnPtr& dst,
+ vectorized::MutableColumnPtr& root_column,
+ const vectorized::DataTypePtr&
most_common_type);
std::unique_ptr<FileColumnIterator> _inner_iter;
};
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index 134b096d8ba..ef10e3b5f7b 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1803,6 +1803,12 @@ void ColumnObject::create_root(const DataTypePtr& type,
MutableColumnPtr&& colum
add_sub_column({}, std::move(column), type);
}
+DataTypePtr ColumnObject::get_most_common_type() const {
+ auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
+ : std::make_shared<MostCommonType>();
+ return type;
+}
+
bool ColumnObject::is_null_root() const {
auto* root = subcolumns.get_root();
if (root == nullptr) {
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 96a27e44e92..564082b0ef4 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -294,6 +294,8 @@ public:
// create root with type and column if missing
void create_root(const DataTypePtr& type, MutableColumnPtr&& column);
+ DataTypePtr get_most_common_type() const;
+
// root is null or type nothing
bool is_null_root() const;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]