This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fa3c7d98c8b [fix](map) the implementation of ColumnMap::replicate was
incorrect" (#26647)
fa3c7d98c8b is described below
commit fa3c7d98c8b7002a4aac7250908d6ecacf3913d7
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Nov 13 12:17:14 2023 +0800
[fix](map) the implementation of ColumnMap::replicate was incorrect"
(#26647)
---
be/src/vec/columns/column_array.cpp | 51 +++++++++-----------
be/src/vec/columns/column_map.cpp | 22 ++++++---
.../data/datatype_p0/complex_types/test_map.out | 16 +++++++
.../data/nereids_p0/datatype/test_map.out | 16 +++++++
.../datatype_p0/complex_types/test_map.groovy | 54 +++++++++++++++++++++
.../suites/nereids_p0/datatype/test_map.groovy | 56 ++++++++++++++++++++++
6 files changed, 180 insertions(+), 35 deletions(-)
diff --git a/be/src/vec/columns/column_array.cpp
b/be/src/vec/columns/column_array.cpp
index 374dd17a88a..fa9e0486367 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -833,42 +833,35 @@ ColumnPtr ColumnArray::replicate(const IColumn::Offsets&
replicate_offsets) cons
return replicate_generic(replicate_offsets);
}
-void ColumnArray::replicate(const uint32_t* indexs, size_t target_size,
IColumn& column) const {
+void ColumnArray::replicate(const uint32_t* indices, size_t target_size,
IColumn& column) const {
if (target_size == 0) {
return;
}
- auto total_size = get_offsets().size();
- //
|---------------------|-------------------------|-------------------------|
- // [0, begin) [begin, begin + count_sz) [begin + count_sz,
size())
- // do not need to copy copy counts[n] times do not need to copy
- IColumn::Offsets replicate_offsets(total_size, 0);
- // copy original data at offset n counts[n] times
- auto begin = 0, end = 0;
- while (begin < target_size) {
- while (end < target_size && indexs[begin] == indexs[end]) {
- end++;
- }
- long index = indexs[begin];
- replicate_offsets[index] = end - begin;
- begin = end;
- }
- // ignored
- for (size_t i = 1; i < total_size; ++i) {
- replicate_offsets[i] += replicate_offsets[i - 1];
- }
+ auto& dst_col = assert_cast<ColumnArray&>(column);
+ auto& dst_data_col = dst_col.get_data();
+ auto& dst_offsets = dst_col.get_offsets();
+ dst_offsets.reserve(target_size);
- auto rep_res = replicate(replicate_offsets);
- if (!rep_res) {
- LOG(WARNING) << "ColumnArray replicate failed, replicate_offsets
count="
- << replicate_offsets.size() << ", max=" <<
replicate_offsets.back();
- return;
+ PODArray<uint32> data_indices_to_replicate;
+
+ for (size_t i = 0; i < target_size; ++i) {
+ const auto index = indices[i];
+ const auto start = offset_at(index);
+ const auto length = size_at(index);
+ dst_offsets.push_back(dst_offsets.back() + length);
+ if (UNLIKELY(length == 0)) {
+ continue;
+ }
+
+ data_indices_to_replicate.reserve(data_indices_to_replicate.size() +
length);
+ for (size_t j = start; j != start + length; ++j) {
+ data_indices_to_replicate.push_back(j);
+ }
}
- auto& rep_res_arr = typeid_cast<const ColumnArray&>(*rep_res);
- ColumnArray& res_arr = typeid_cast<ColumnArray&>(column);
- res_arr.data = rep_res_arr.get_data_ptr();
- res_arr.offsets = rep_res_arr.get_offsets_ptr();
+ get_data().replicate(data_indices_to_replicate.data(),
data_indices_to_replicate.size(),
+ dst_data_col);
}
template <typename T>
diff --git a/be/src/vec/columns/column_map.cpp
b/be/src/vec/columns/column_map.cpp
index 47a41c3dcfe..f3fa5ab6c26 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -433,14 +433,24 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets)
const {
return res;
}
-void ColumnMap::replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const {
+void ColumnMap::replicate(const uint32_t* indices, size_t target_size,
IColumn& column) const {
auto& res = reinterpret_cast<ColumnMap&>(column);
- // Make a temp column array for reusing its replicate function
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable())
- ->replicate(indexs, target_size,
res.keys_column->assume_mutable_ref());
- ColumnArray::create(values_column->assume_mutable(),
offsets_column->assume_mutable())
- ->replicate(indexs, target_size,
res.values_column->assume_mutable_ref());
+ auto keys_array =
+ ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable());
+
+ auto result_array = ColumnArray::create(res.keys_column->assume_mutable(),
+
res.offsets_column->assume_mutable());
+ keys_array->replicate(indices, target_size,
result_array->assume_mutable_ref());
+
+ result_array = ColumnArray::create(res.values_column->assume_mutable(),
+ res.offsets_column->clone_empty());
+
+ auto values_array =
+ ColumnArray::create(values_column->assume_mutable(),
offsets_column->assume_mutable());
+
+ /// FIXME: To reuse the replicate of ColumnArray, the offsets column was
replicated twice
+ values_array->replicate(indices, target_size,
result_array->assume_mutable_ref());
}
MutableColumnPtr ColumnMap::get_shrinked_column() {
diff --git a/regression-test/data/datatype_p0/complex_types/test_map.out
b/regression-test/data/datatype_p0/complex_types/test_map.out
new file mode 100644
index 00000000000..00a74e74af0
--- /dev/null
+++ b/regression-test/data/datatype_p0/complex_types/test_map.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 {"key1":"value1"} 1 1
+1 {"key1_1":"value1_1"} 1 1
+1 {"key1":"value1"} 2 1
+1 {"key1_1":"value1_1"} 2 1
+2 {"key2":"value2", "key22":"value22"} 3 2
+2 {"key2_1":"value2_1", "key22_1":"value22_1"} 3 2
+2 {"key2_2":"value2_2", "key22_2":"value22_2"} 3 2
+2 {"key2":"value2", "key22":"value22"} 4 2
+2 {"key2_1":"value2_1", "key22_1":"value22_1"} 4 2
+2 {"key2_2":"value2_2", "key22_2":"value22_2"} 4 2
+3 {"key3":"value3", "key33":"value33", "key3333":"value333"} 5
3
+3 {"key3":"value3", "key33":"value33", "key3333":"value333"} 6
3
+4 {"key4":"value4", "key44":"value44", "key444":"value444",
"key4444":"value4444"} \N \N
+
diff --git a/regression-test/data/nereids_p0/datatype/test_map.out
b/regression-test/data/nereids_p0/datatype/test_map.out
new file mode 100644
index 00000000000..00a74e74af0
--- /dev/null
+++ b/regression-test/data/nereids_p0/datatype/test_map.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 {"key1":"value1"} 1 1
+1 {"key1_1":"value1_1"} 1 1
+1 {"key1":"value1"} 2 1
+1 {"key1_1":"value1_1"} 2 1
+2 {"key2":"value2", "key22":"value22"} 3 2
+2 {"key2_1":"value2_1", "key22_1":"value22_1"} 3 2
+2 {"key2_2":"value2_2", "key22_2":"value22_2"} 3 2
+2 {"key2":"value2", "key22":"value22"} 4 2
+2 {"key2_1":"value2_1", "key22_1":"value22_1"} 4 2
+2 {"key2_2":"value2_2", "key22_2":"value22_2"} 4 2
+3 {"key3":"value3", "key33":"value33", "key3333":"value333"} 5
3
+3 {"key3":"value3", "key33":"value33", "key3333":"value333"} 6
3
+4 {"key4":"value4", "key44":"value44", "key444":"value444",
"key4444":"value4444"} \N \N
+
diff --git a/regression-test/suites/datatype_p0/complex_types/test_map.groovy
b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
new file mode 100644
index 00000000000..6a3d28c5ed0
--- /dev/null
+++ b/regression-test/suites/datatype_p0/complex_types/test_map.groovy
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map") {
+ sql 'set enable_nereids_planner=false'
+ sql "DROP TABLE IF EXISTS `test_map_table`"
+ sql """
+ create table `test_map_table` (
+ `k1` int,
+ `value` map<text, text>
+ ) distributed by hash(`k1`) buckets 1 properties("replication_num" =
"1");
+ """
+
+ sql 'insert into `test_map_table` values (1, {"key1": "value1"});'
+ sql 'insert into `test_map_table` values (1, {"key1_1": "value1_1"});'
+ sql 'insert into `test_map_table` values (2, {"key2": "value2", "key22":
"value22"});'
+ sql 'insert into `test_map_table` values (2, {"key2_1": "value2_1",
"key22_1": "value22_1"});'
+ sql 'insert into `test_map_table` values (2, {"key2_2": "value2_2",
"key22_2": "value22_2"});'
+ sql 'insert into `test_map_table` values (3, {"key3": "value3", "key33":
"value33", "key3333": "value333"});'
+ sql 'insert into `test_map_table` values (4, {"key4": "value4", "key44":
"value44", "key444": "value444", "key4444": "value4444"});'
+
+ sql "DROP TABLE IF EXISTS `test_map_table_right`"
+ sql """
+ create table `test_map_table_right` (
+ `id` int,
+ `value` int
+ ) distributed by hash(`id`) buckets 1 properties("replication_num" =
"1");
+ """
+
+ sql 'insert into `test_map_table_right` values(1, 1);'
+ sql 'insert into `test_map_table_right` values(2, 1);'
+ sql 'insert into `test_map_table_right` values(3, 2);'
+ sql 'insert into `test_map_table_right` values(4, 2);'
+ sql 'insert into `test_map_table_right` values(5, 3);'
+ sql 'insert into `test_map_table_right` values(6, 3);'
+
+ qt_sql """
+ select * from test_map_table left join test_map_table_right on
test_map_table.k1 = test_map_table_right.value order by 1,3;
+ """
+}
diff --git a/regression-test/suites/nereids_p0/datatype/test_map.groovy
b/regression-test/suites/nereids_p0/datatype/test_map.groovy
new file mode 100644
index 00000000000..54f41e421f3
--- /dev/null
+++ b/regression-test/suites/nereids_p0/datatype/test_map.groovy
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map") {
+ sql 'set enable_nereids_planner=true'
+ sql 'set enable_fallback_to_original_planner=false'
+
+ sql "DROP TABLE IF EXISTS `test_map_table`"
+ sql """
+ create table `test_map_table` (
+ `k1` int,
+ `value` map<text, text>
+ ) distributed by hash(`k1`) buckets 1 properties("replication_num" =
"1");
+ """
+
+ sql 'insert into `test_map_table` values (1, {"key1": "value1"});'
+ sql 'insert into `test_map_table` values (1, {"key1_1": "value1_1"});'
+ sql 'insert into `test_map_table` values (2, {"key2": "value2", "key22":
"value22"});'
+ sql 'insert into `test_map_table` values (2, {"key2_1": "value2_1",
"key22_1": "value22_1"});'
+ sql 'insert into `test_map_table` values (2, {"key2_2": "value2_2",
"key22_2": "value22_2"});'
+ sql 'insert into `test_map_table` values (3, {"key3": "value3", "key33":
"value33", "key3333": "value333"});'
+ sql 'insert into `test_map_table` values (4, {"key4": "value4", "key44":
"value44", "key444": "value444", "key4444": "value4444"});'
+
+ sql "DROP TABLE IF EXISTS `test_map_table_right`"
+ sql """
+ create table `test_map_table_right` (
+ `id` int,
+ `value` int
+ ) distributed by hash(`id`) buckets 1 properties("replication_num" =
"1");
+ """
+
+ sql 'insert into `test_map_table_right` values(1, 1);'
+ sql 'insert into `test_map_table_right` values(2, 1);'
+ sql 'insert into `test_map_table_right` values(3, 2);'
+ sql 'insert into `test_map_table_right` values(4, 2);'
+ sql 'insert into `test_map_table_right` values(5, 3);'
+ sql 'insert into `test_map_table_right` values(6, 3);'
+
+ qt_sql """
+ select * from test_map_table left join test_map_table_right on
test_map_table.k1 = test_map_table_right.value order by 1,3;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]