This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2048fbcca72 [fix](agg function) incorrect result of map agg (#39743)
2048fbcca72 is described below
commit 2048fbcca72e8ffc1222ecce3f9db6ee37498ad3
Author: Jerry Hu <[email protected]>
AuthorDate: Fri Aug 23 17:36:27 2024 +0800
[fix](agg function) incorrect result of map agg (#39743)
## Proposed changes
In the function `void add(const Field& key_, const Field& value)`, it
should not return while the key exists in the set.
---
.../aggregate_functions/aggregate_function_map.h | 15 +++++------
.../data/query_p0/aggregate/map_agg.out | 5 ++++
.../suites/query_p0/aggregate/map_agg.groovy | 31 ++++++++++++++++++++++
3 files changed, 42 insertions(+), 9 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h
b/be/src/vec/aggregate_functions/aggregate_function_map.h
index ca962bd3207..83e96a8d236 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_map.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_map.h
@@ -18,7 +18,6 @@
#pragma once
#include <parallel_hashmap/phmap.h>
-#include <string.h>
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
@@ -92,7 +91,7 @@ struct AggregateFunctionMapAggData {
}
if (UNLIKELY(_map.find(key) != _map.end())) {
- return;
+ continue;
}
key.data = _arena.insert(key.data, key.size);
@@ -161,9 +160,7 @@ struct AggregateFunctionMapAggData {
StringRef key;
for (size_t i = 0; i < size; i++) {
read_binary(key, buf);
- if (_map.find(key) != _map.cend()) {
- continue;
- }
+ DCHECK(_map.find(key) == _map.cend());
key.data = _arena.insert(key.data, key.size);
assert_cast<KeyColumnType&,
TypeCheckOnRelease::DISABLE>(*_key_column)
.insert_data(key.data, key.size);
@@ -208,9 +205,9 @@ public:
void add(AggregateDataPtr __restrict place, const IColumn** columns,
ssize_t row_num,
Arena* arena) const override {
if (columns[0]->is_nullable()) {
- auto& nullable_col =
+ const auto& nullable_col =
assert_cast<const ColumnNullable&,
TypeCheckOnRelease::DISABLE>(*columns[0]);
- auto& nullable_map = nullable_col.get_null_map_data();
+ const auto& nullable_map = nullable_col.get_null_map_data();
if (nullable_map[row_num]) {
return;
}
@@ -267,7 +264,7 @@ public:
void deserialize_from_column(AggregateDataPtr places, const IColumn&
column, Arena* arena,
size_t num_rows) const override {
- auto& col = assert_cast<const ColumnMap&>(column);
+ const auto& col = assert_cast<const ColumnMap&>(column);
auto* data = &(this->data(places));
for (size_t i = 0; i != num_rows; ++i) {
auto map = doris::vectorized::get<Map>(col[i]);
@@ -298,7 +295,7 @@ public:
Arena* arena) const override {
DCHECK(end <= column.size() && begin <= end)
<< ", begin:" << begin << ", end:" << end << ",
column.size():" << column.size();
- auto& col = assert_cast<const ColumnMap&>(column);
+ const auto& col = assert_cast<const ColumnMap&>(column);
for (size_t i = begin; i <= end; ++i) {
auto map = doris::vectorized::get<Map>(col[i]);
this->data(place).add(map[0], map[1]);
diff --git a/regression-test/data/query_p0/aggregate/map_agg.out
b/regression-test/data/query_p0/aggregate/map_agg.out
index c7db1dcaeae..186e888b57f 100644
--- a/regression-test/data/query_p0/aggregate/map_agg.out
+++ b/regression-test/data/query_p0/aggregate/map_agg.out
@@ -43,3 +43,8 @@ V5_3
-- !multi --
1 2
+-- !test_dumplicate --
+1 \N
+2 bddd
+3 \N
+
diff --git a/regression-test/suites/query_p0/aggregate/map_agg.groovy
b/regression-test/suites/query_p0/aggregate/map_agg.groovy
index 351c077df10..385a40b065f 100644
--- a/regression-test/suites/query_p0/aggregate/map_agg.groovy
+++ b/regression-test/suites/query_p0/aggregate/map_agg.groovy
@@ -322,4 +322,35 @@ suite("map_agg") {
, day
) t order by 1, 2;
"""
+
+ sql "DROP TABLE IF EXISTS `test_map_agg_2`;"
+ sql """
+ CREATE TABLE `test_map_agg_2` (
+ `k1` int NULL,
+ `k2` int NULL,
+ `v1` text NULL,
+ `v2` text NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`, `k2`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 4
+ PROPERTIES ( 'replication_num' = '1');
+ """
+
+ sql """
+ insert into `test_map_agg_2` values
+ ( 3 , 1 , 'k' , 'j' ),
+ ( 3 , 2 , 'a' , 'a3' ),
+ ( 5 , 2 , 'a' , 'a5' ),
+ ( 1 , 1 , 'ee' , 'nn' ),
+ ( 1 , 1 , 'a' , 'b' ),
+ ( 1 , 2 , 'a' , 'b' ),
+ ( 1 , 3 , 'c' , 'c' ),
+ ( 2 , 1 , 'e' , 'f' ),
+ ( 2 , 2 , 'a' , 'a2' ),
+ ( 4 , 2 , 'b' , 'bddd' ),
+ ( 4 , 2 , 'a' , 'a4' );
+ """
+
+ sql "set experimental_ignore_storage_data_distribution = 0;"
+ qt_test_dumplicate "select k2, m['b'] from (select k2, map_agg(v1, v2) m
from `test_map_agg_2` group by k2) a order by k2;"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]