This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 91a6fb3af0a [fix](hashmap)Fix the agg hashmap iterator so it works
correctly. (#61215)
91a6fb3af0a is described below
commit 91a6fb3af0a84adce7a18d8538a2d5e7b2f743e7
Author: Mryange <[email protected]>
AuthorDate: Thu Mar 12 12:07:33 2026 +0800
[fix](hashmap)Fix the agg hashmap iterator so it works correctly. (#61215)
### What problem does this PR solve?
Previously, for agg hashmap<key, value>, when using the iterator, we
could only iterate over the values but not the keys.
In addition, there is a hidden bug in the string hashmap (it just never
got triggered before because we couldn’t iterate over the keys; I
triggered it with BEUT in another PR
https://github.com/apache/doris/pull/61058 ).
This PR is a prerequisite; we will need a complete iterator for some
upcoming features.
---
be/src/exec/common/hash_table/ph_hash_map.h | 6 +
be/src/exec/common/hash_table/string_hash_map.h | 25 +-
be/src/exec/common/hash_table/string_hash_table.h | 14 +-
be/test/exec/hash_map/hash_table_method_test.cpp | 616 ++++++++++++++++++++++
4 files changed, 656 insertions(+), 5 deletions(-)
diff --git a/be/src/exec/common/hash_table/ph_hash_map.h
b/be/src/exec/common/hash_table/ph_hash_map.h
index 92a6b5f9557..212b59609c9 100644
--- a/be/src/exec/common/hash_table/ph_hash_map.h
+++ b/be/src/exec/common/hash_table/ph_hash_map.h
@@ -188,6 +188,12 @@ public:
for (auto& v : *this) func(v.get_second());
}
+ /// Call func(const Key &, Mapped &) for each hash map element.
+ template <typename Func>
+ void for_each(Func&& func) {
+ for (auto& v : *this) func(v.get_first(), v.get_second());
+ }
+
size_t get_buffer_size_in_bytes() const {
const auto capacity = _hash_map.capacity();
return capacity * sizeof(typename HashMapImpl::slot_type);
diff --git a/be/src/exec/common/hash_table/string_hash_map.h
b/be/src/exec/common/hash_table/string_hash_map.h
index c615e6b5dcc..1c1dfce50dd 100644
--- a/be/src/exec/common/hash_table/string_hash_map.h
+++ b/be/src/exec/common/hash_table/string_hash_map.h
@@ -68,7 +68,7 @@ struct StringHashMapCell<doris::StringRef, TMapped>
using Base::Base;
static constexpr bool need_zero_value_storage = false;
// external
- using Base::get_key;
+ const doris::StringRef& get_key() const { return this->value.first; } ///
NOLINT
// internal
static const doris::StringRef& get_key(const value_type& value_) { return
value_.first; }
@@ -150,6 +150,29 @@ public:
func(v.get_second());
}
}
+
+ template <typename Func>
+ void for_each(Func&& func) {
+ if (this->m0.size()) {
+ func(this->m0.zero_value()->get_key(),
this->m0.zero_value()->get_second());
+ }
+ for (auto& v : this->m1) {
+ func(v.get_key(), v.get_second());
+ }
+ for (auto& v : this->m2) {
+ func(v.get_key(), v.get_second());
+ }
+ for (auto& v : this->m3) {
+ func(v.get_key(), v.get_second());
+ }
+ for (auto& v : this->m4) {
+ func(v.get_key(), v.get_second());
+ }
+ for (auto& v : this->ms) {
+ func(v.get_key(), v.get_second());
+ }
+ }
+
template <typename MappedType>
char* get_null_key_data() {
return nullptr;
diff --git a/be/src/exec/common/hash_table/string_hash_table.h
b/be/src/exec/common/hash_table/string_hash_table.h
index 166b56bda29..a110291426f 100644
--- a/be/src/exec/common/hash_table/string_hash_table.h
+++ b/be/src/exec/common/hash_table/string_hash_table.h
@@ -50,7 +50,9 @@ StringKey to_string_key(const doris::StringRef& key) {
template <typename T>
inline doris::StringRef ALWAYS_INLINE to_string_ref(const T& n) {
assert(n != 0);
- return {reinterpret_cast<const char*>(&n), sizeof(T) - (__builtin_clzll(n)
>> 3)};
+ // __builtin_clzll counts leading zero bits in a 64-bit (8-byte) value,
+ // so we must use 8 here instead of sizeof(T) to get the correct byte
count.
+ return {reinterpret_cast<const char*>(&n), static_cast<size_t>(8 -
(__builtin_clzll(n) >> 3))};
}
inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
assert(n.items[1] != 0);
@@ -415,7 +417,7 @@ protected:
return static_cast<Derived&>(*this);
}
- auto& operator*() const {
+ auto& operator*() {
switch (sub_table_index) {
case 0: {
this->cell = *(container->m0.zero_value());
@@ -444,9 +446,13 @@ protected:
}
return cell;
}
- auto* operator->() const { return &(this->operator*()); }
+ auto* operator->() { return &(this->operator*()); }
- auto get_ptr() const { return &(this->operator*()); }
+ auto get_ptr() { return &(this->operator*()); }
+
+ // Provide get_first()/get_second() at the iterator level, consistent
with PHHashMap::iterator
+ auto& get_first() { return (**this).get_first(); }
+ auto& get_second() { return (**this).get_second(); }
size_t get_hash() const {
switch (sub_table_index) {
diff --git a/be/test/exec/hash_map/hash_table_method_test.cpp
b/be/test/exec/hash_map/hash_table_method_test.cpp
index 60cc2284aa2..697ce283751 100644
--- a/be/test/exec/hash_map/hash_table_method_test.cpp
+++ b/be/test/exec/hash_map/hash_table_method_test.cpp
@@ -20,6 +20,7 @@
#include <set>
#include "core/data_type/data_type_number.h"
+#include "exec/common/agg_utils.h"
#include "exec/common/columns_hashing.h"
#include "exec/common/hash_table/hash.h"
#include "exec/common/hash_table/hash_map_context.h"
@@ -221,4 +222,619 @@ TEST(HashTableMethodTest,
testNullableIteratorSkipsNullKey) {
}
}
+// Helper: create distinguishable AggregateDataPtr values for testing
+static AggregateDataPtr make_mapped(size_t val) {
+ return reinterpret_cast<AggregateDataPtr>(val);
+}
+
+// ========== MethodOneNumber<UInt32, AggData<UInt32>> ==========
+// AggData<UInt32> = PHHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>
+TEST(HashTableMethodTest, testMethodOneNumberAggInsertFindForEach) {
+ MethodOneNumber<UInt32, AggData<UInt32>> method;
+ using State = MethodOneNumber<UInt32, AggData<UInt32>>::State;
+
+ auto col = ColumnHelper::create_column<DataTypeInt32>({10, 20, 30, 40,
50});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 5;
+
+ // Insert
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) {
+ ctor(key, make_mapped(i + 1));
+ },
+ [](auto& mapped) { FAIL() << "unexpected null"; });
+ }
+ }
+
+ // Find existing keys
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ auto result = method.find(state, i);
+ ASSERT_TRUE(result.is_found());
+ EXPECT_EQ(result.get_mapped(), make_mapped(i + 1));
+ }
+ }
+
+ // Find non-existing key
+ {
+ auto miss_col = ColumnHelper::create_column<DataTypeInt32>({999});
+ ColumnRawPtrs miss_columns = {miss_col.get()};
+ State state(miss_columns);
+ method.init_serialized_keys(miss_columns, 1);
+ auto result = method.find(state, 0);
+ EXPECT_FALSE(result.is_found());
+ }
+
+ // for_each
+ {
+ size_t count = 0;
+ method.hash_table->for_each([&](const auto& key, auto& mapped) {
+ EXPECT_NE(mapped, nullptr);
+ count++;
+ });
+ EXPECT_EQ(count, 5);
+ }
+
+ // for_each_mapped
+ {
+ size_t count = 0;
+ method.hash_table->for_each_mapped([&](auto& mapped) {
+ EXPECT_NE(mapped, nullptr);
+ count++;
+ });
+ EXPECT_EQ(count, 5);
+ }
+}
+
+// ========== MethodOneNumber Phase2 (HashMixWrapper) ==========
+// AggregatedDataWithUInt32KeyPhase2 = PHHashMap<UInt32, AggregateDataPtr,
HashMixWrapper<UInt32>>
+TEST(HashTableMethodTest, testMethodOneNumberPhase2AggInsertFindForEach) {
+ MethodOneNumber<UInt32, AggregatedDataWithUInt32KeyPhase2> method;
+ using State = MethodOneNumber<UInt32,
AggregatedDataWithUInt32KeyPhase2>::State;
+
+ auto col = ColumnHelper::create_column<DataTypeInt32>({100, 200, 300});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 3;
+
+ // Insert
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) {
+ ctor(key, make_mapped(i + 100));
+ },
+ [](auto& mapped) { FAIL(); });
+ }
+ }
+
+ // Find
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ auto result = method.find(state, i);
+ ASSERT_TRUE(result.is_found());
+ EXPECT_EQ(result.get_mapped(), make_mapped(i + 100));
+ }
+ }
+
+ // for_each + for_each_mapped
+ {
+ size_t count = 0;
+ method.hash_table->for_each([&](const auto& key, auto& mapped) {
count++; });
+ EXPECT_EQ(count, 3);
+ }
+ {
+ size_t count = 0;
+ method.hash_table->for_each_mapped([&](auto& mapped) { count++; });
+ EXPECT_EQ(count, 3);
+ }
+}
+
+// ========== MethodStringNoCache<AggregatedDataWithShortStringKey> ==========
+// AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>
+TEST(HashTableMethodTest, testMethodStringNoCacheAggInsertFindForEach) {
+ MethodStringNoCache<AggregatedDataWithShortStringKey> method;
+ using State = MethodStringNoCache<AggregatedDataWithShortStringKey>::State;
+
+ // Include strings of varying lengths to exercise different StringHashMap
sub-maps
+ auto col = ColumnHelper::create_column<DataTypeString>(
+ {"hello", "world", "foo", "bar", "longstring_exceeding_16_bytes"});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 5;
+
+ // Insert
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) {
+ ctor(key, make_mapped(i + 10));
+ },
+ [](auto& mapped) { FAIL(); });
+ }
+ }
+
+ // Find
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ auto result = method.find(state, i);
+ ASSERT_TRUE(result.is_found());
+ EXPECT_EQ(result.get_mapped(), make_mapped(i + 10));
+ }
+ }
+
+ // for_each
+ {
+ size_t count = 0;
+ method.hash_table->for_each([&](const auto& key, auto& mapped) {
+ EXPECT_NE(mapped, nullptr);
+ count++;
+ });
+ EXPECT_EQ(count, 5);
+ }
+
+ // for_each_mapped
+ {
+ size_t count = 0;
+ method.hash_table->for_each_mapped([&](auto& mapped) { count++; });
+ EXPECT_EQ(count, 5);
+ }
+}
+
+// ========== MethodSerialized<AggregatedDataWithStringKey> ==========
+// AggregatedDataWithStringKey = PHHashMap<StringRef, AggregateDataPtr>
+// StringRef keys require arena persistence to survive across
init_serialized_keys calls.
+TEST(HashTableMethodTest, testMethodSerializedAggInsertFindForEach) {
+ MethodSerialized<AggregatedDataWithStringKey> method;
+ using State = MethodSerialized<AggregatedDataWithStringKey>::State;
+
+ auto col1 = ColumnHelper::create_column<DataTypeInt32>({1, 2, 3});
+ auto col2 = ColumnHelper::create_column<DataTypeString>({"a", "bb",
"ccc"});
+ ColumnRawPtrs key_columns = {col1.get(), col2.get()};
+ const size_t rows = 3;
+
+ // Use a separate arena to persist StringRef key data
+ Arena persist_arena;
+
+ // Insert
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) {
+ method.try_presis_key_and_origin(key, origin,
persist_arena);
+ ctor(key, make_mapped(i + 50));
+ },
+ [](auto& mapped) { FAIL(); });
+ }
+ }
+
+ // for_each (keys backed by persist_arena)
+ {
+ size_t count = 0;
+ method.hash_table->for_each([&](const auto& key, auto& mapped) {
+ EXPECT_GT(key.size, 0);
+ EXPECT_NE(mapped, nullptr);
+ count++;
+ });
+ EXPECT_EQ(count, 3);
+ }
+
+ // for_each_mapped
+ {
+ size_t count = 0;
+ method.hash_table->for_each_mapped([&](auto& mapped) {
+ EXPECT_NE(mapped, nullptr);
+ count++;
+ });
+ EXPECT_EQ(count, 3);
+ }
+
+ // Find (re-init serialized keys for lookup)
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ auto result = method.find(state, i);
+ ASSERT_TRUE(result.is_found());
+ EXPECT_EQ(result.get_mapped(), make_mapped(i + 50));
+ }
+ }
+}
+
+// ========== MethodKeysFixed<AggData<UInt64>> ==========
+// Fixed-width multi-column keys packed into UInt64
+TEST(HashTableMethodTest, testMethodKeysFixedAggInsertFindForEach) {
+ MethodKeysFixed<AggData<UInt64>> method(Sizes {sizeof(int32_t),
sizeof(int32_t)});
+ using State = MethodKeysFixed<AggData<UInt64>>::State;
+
+ auto col1 = ColumnHelper::create_column<DataTypeInt32>({1, 2, 3, 4});
+ auto col2 = ColumnHelper::create_column<DataTypeInt32>({10, 20, 30, 40});
+ ColumnRawPtrs key_columns = {col1.get(), col2.get()};
+ const size_t rows = 4;
+
+ // Insert
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) {
+ ctor(key, make_mapped(i + 200));
+ },
+ [](auto& mapped) { FAIL(); });
+ }
+ }
+
+ // Find
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ auto result = method.find(state, i);
+ ASSERT_TRUE(result.is_found());
+ EXPECT_EQ(result.get_mapped(), make_mapped(i + 200));
+ }
+ }
+
+ // for_each
+ {
+ size_t count = 0;
+ method.hash_table->for_each([&](const auto& key, auto& mapped) {
+ EXPECT_NE(mapped, nullptr);
+ count++;
+ });
+ EXPECT_EQ(count, 4);
+ }
+}
+
+// ========== Nullable MethodOneNumber (MethodSingleNullableColumn +
MethodOneNumber) ==========
+// AggDataNullable<UInt32> = DataWithNullKey<PHHashMap<UInt32,
AggregateDataPtr, HashCRC32<UInt32>>>
+// Tests null key insertion, find, and for_each (which excludes null from
PHHashMap::for_each).
+TEST(HashTableMethodTest, testNullableMethodOneNumberAggInsertFindForEach) {
+ using NullableMethod =
+ MethodSingleNullableColumn<MethodOneNumber<UInt32,
AggDataNullable<UInt32>>>;
+ NullableMethod method;
+ using State = NullableMethod::State;
+
+ // values: {10, 20, 30, 40, 50}, null at rows 1 and 3
+ auto col = ColumnHelper::create_nullable_column<DataTypeInt32>({10, 20,
30, 40, 50},
+ {0, 1, 0,
1, 0});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 5;
+
+ // Insert
+ size_t null_create_count = 0;
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) {
+ ctor(key, make_mapped(i + 1));
+ },
+ [&](auto& mapped) {
+ null_create_count++;
+ mapped = make_mapped(999);
+ });
+ }
+ }
+
+ // null_creator called once for first null row (index 1); second null
(index 3) is deduplicated
+ EXPECT_EQ(null_create_count, 1);
+
+ auto& ht = *method.hash_table;
+ EXPECT_TRUE(ht.has_null_key_data());
+ EXPECT_EQ(ht.get_null_key_data<AggregateDataPtr>(), make_mapped(999));
+
+ // Find
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+
+ // row 0: key=10, non-null
+ auto r0 = method.find(state, 0);
+ ASSERT_TRUE(r0.is_found());
+ EXPECT_EQ(r0.get_mapped(), make_mapped(1));
+
+ // row 1: null → returns null key data
+ auto r1 = method.find(state, 1);
+ ASSERT_TRUE(r1.is_found());
+ EXPECT_EQ(r1.get_mapped(), make_mapped(999));
+
+ // row 2: key=30, non-null
+ auto r2 = method.find(state, 2);
+ ASSERT_TRUE(r2.is_found());
+ EXPECT_EQ(r2.get_mapped(), make_mapped(3));
+
+ // row 4: key=50, non-null
+ auto r4 = method.find(state, 4);
+ ASSERT_TRUE(r4.is_found());
+ EXPECT_EQ(r4.get_mapped(), make_mapped(5));
+ }
+
+ // for_each_mapped: PHHashMap::for_each_mapped iterates only non-null keys
+ {
+ size_t count = 0;
+ ht.for_each_mapped([&](auto& mapped) { count++; });
+ EXPECT_EQ(count, 3); // keys 10, 30, 50
+ }
+
+ // DataWithNullKey::size() includes null key
+ EXPECT_EQ(ht.size(), 4); // 3 non-null + 1 null
+}
+
+// ========== Nullable MethodStringNoCache ==========
+// AggregatedDataWithNullableShortStringKey =
DataWithNullKey<StringHashMap<AggregateDataPtr>>
+TEST(HashTableMethodTest, testNullableMethodStringNoCacheAggInsertFindForEach)
{
+ using NullableMethod = MethodSingleNullableColumn<
+ MethodStringNoCache<AggregatedDataWithNullableShortStringKey>>;
+ NullableMethod method;
+ using State = NullableMethod::State;
+
+ // values: {"hello", <null>, "world", <null>}
+ auto col = ColumnHelper::create_nullable_column<DataTypeString>({"hello",
"", "world", ""},
+ {0, 1, 0,
1});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 4;
+
+ // Insert
+ size_t null_create_count = 0;
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) {
+ ctor(key, make_mapped(i + 1));
+ },
+ [&](auto& mapped) {
+ null_create_count++;
+ mapped = make_mapped(888);
+ });
+ }
+ }
+
+ EXPECT_EQ(null_create_count, 1);
+
+ auto& ht = *method.hash_table;
+ EXPECT_TRUE(ht.has_null_key_data());
+ EXPECT_EQ(ht.get_null_key_data<AggregateDataPtr>(), make_mapped(888));
+
+ // Find
+ {
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+
+ // row 0: "hello"
+ auto r0 = method.find(state, 0);
+ ASSERT_TRUE(r0.is_found());
+ EXPECT_EQ(r0.get_mapped(), make_mapped(1));
+
+ // row 1: null
+ auto r1 = method.find(state, 1);
+ ASSERT_TRUE(r1.is_found());
+ EXPECT_EQ(r1.get_mapped(), make_mapped(888));
+
+ // row 2: "world"
+ auto r2 = method.find(state, 2);
+ ASSERT_TRUE(r2.is_found());
+ EXPECT_EQ(r2.get_mapped(), make_mapped(3));
+ }
+
+ // for_each: StringHashMap::for_each iterates only non-null keys
+ {
+ size_t count = 0;
+ ht.for_each([&](const auto& key, auto& mapped) { count++; });
+ EXPECT_EQ(count, 2); // "hello" and "world"
+ }
+
+ // DataWithNullKey::size() includes null key
+ EXPECT_EQ(ht.size(), 3); // 2 non-null + 1 null
+}
+
+// ========== PHHashMap iterator: traverse, sort, verify, assignment ==========
+TEST(HashTableMethodTest, testPHHashMapIterator) {
+ MethodOneNumber<UInt32, AggData<UInt32>> method;
+ using State = MethodOneNumber<UInt32, AggData<UInt32>>::State;
+
+ auto col = ColumnHelper::create_column<DataTypeInt32>({50, 10, 40, 20,
30});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 5;
+
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) { ctor(key,
make_mapped(i + 1)); },
+ [](auto& mapped) { FAIL(); });
+ }
+
+ auto& ht = *method.hash_table;
+
+ // Collect all (key, mapped) pairs via iterator
+ std::vector<std::pair<UInt32, AggregateDataPtr>> entries;
+ for (auto it = ht.begin(); it != ht.end(); ++it) {
+ entries.emplace_back(it->get_first(), it->get_second());
+ }
+ ASSERT_EQ(entries.size(), 5);
+
+ // Sort by key and verify
+ std::sort(entries.begin(), entries.end(),
+ [](const auto& a, const auto& b) { return a.first < b.first; });
+ // Inserted: {50→1, 10→2, 40→3, 20→4, 30→5}
+ EXPECT_EQ(entries[0].first, 10);
+ EXPECT_EQ(entries[0].second, make_mapped(2));
+ EXPECT_EQ(entries[1].first, 20);
+ EXPECT_EQ(entries[1].second, make_mapped(4));
+ EXPECT_EQ(entries[2].first, 30);
+ EXPECT_EQ(entries[2].second, make_mapped(5));
+ EXPECT_EQ(entries[3].first, 40);
+ EXPECT_EQ(entries[3].second, make_mapped(3));
+ EXPECT_EQ(entries[4].first, 50);
+ EXPECT_EQ(entries[4].second, make_mapped(1));
+
+ // Iterator assignment: it = begin(), it2 = it
+ auto it = ht.begin();
+ auto it2 = it; // copy
+ EXPECT_TRUE(it == it2);
+ EXPECT_EQ(it->get_first(), it2->get_first());
+ EXPECT_EQ(it->get_second(), it2->get_second());
+
+ ++it;
+ EXPECT_TRUE(it != it2); // diverged after increment
+
+ it2 = it; // reassignment
+ EXPECT_TRUE(it == it2);
+ EXPECT_EQ(it->get_first(), it2->get_first());
+
+ // Empty hash table: begin == end
+ MethodOneNumber<UInt32, AggData<UInt32>> empty_method;
+ EXPECT_TRUE(empty_method.hash_table->begin() ==
empty_method.hash_table->end());
+}
+
+// ========== StringHashMap iterator: traverse, sort, verify, assignment
==========
+TEST(HashTableMethodTest, testStringHashMapIterator) {
+ MethodStringNoCache<AggregatedDataWithShortStringKey> method;
+ using State = MethodStringNoCache<AggregatedDataWithShortStringKey>::State;
+
+ // Different lengths to hit different sub-maps (m1: <=8, m2: <=16, m3:
<=24, ms: >24)
+ auto col = ColumnHelper::create_column<DataTypeString>(
+ {"z", "ab", "hello_world_12345", "tiny",
"a_very_long_string_over_24_chars"});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 5;
+
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) { ctor(key,
make_mapped(i + 1)); },
+ [](auto& mapped) { FAIL(); });
+ }
+
+ auto& ht = *method.hash_table;
+
+ // Collect all (key_string, mapped) pairs via iterator
+ std::vector<std::pair<std::string, AggregateDataPtr>> entries;
+ for (auto it = ht.begin(); it != ht.end(); ++it) {
+ auto key = it.get_first();
+ entries.emplace_back(std::string(key.data, key.size), it.get_second());
+ }
+ ASSERT_EQ(entries.size(), 5);
+
+ // Sort by key string and verify
+ std::sort(entries.begin(), entries.end(),
+ [](const auto& a, const auto& b) { return a.first < b.first; });
+ // Sorted: "a_very_long...", "ab", "hello_world_12345", "tiny", "z"
+ EXPECT_EQ(entries[0].first, "a_very_long_string_over_24_chars");
+ EXPECT_EQ(entries[0].second, make_mapped(5));
+ EXPECT_EQ(entries[1].first, "ab");
+ EXPECT_EQ(entries[1].second, make_mapped(2));
+ EXPECT_EQ(entries[2].first, "hello_world_12345");
+ EXPECT_EQ(entries[2].second, make_mapped(3));
+ EXPECT_EQ(entries[3].first, "tiny");
+ EXPECT_EQ(entries[3].second, make_mapped(4));
+ EXPECT_EQ(entries[4].first, "z");
+ EXPECT_EQ(entries[4].second, make_mapped(1));
+
+ // Iterator assignment: copy and reassign
+ auto it = ht.begin();
+ auto it2 = it;
+ EXPECT_TRUE(it == it2);
+
+ auto key1 = it.get_first();
+ auto key2 = it2.get_first();
+ EXPECT_EQ(key1, key2);
+
+ ++it;
+ EXPECT_TRUE(it != it2);
+
+ it2 = it; // reassignment
+ EXPECT_TRUE(it == it2);
+
+ // Empty StringHashMap: begin == end
+ MethodStringNoCache<AggregatedDataWithShortStringKey> empty_method;
+ EXPECT_TRUE(empty_method.hash_table->begin() ==
empty_method.hash_table->end());
+}
+
+// ========== DataWithNullKey iterator: only non-null entries, null key
accessed separately ==========
+TEST(HashTableMethodTest, testDataWithNullKeyIterator) {
+ using NullableMethod =
+ MethodSingleNullableColumn<MethodOneNumber<UInt32,
AggDataNullable<UInt32>>>;
+ NullableMethod method;
+ using State = NullableMethod::State;
+
+ // values: {10, 20, 30}, null at row 1
+ auto col = ColumnHelper::create_nullable_column<DataTypeInt32>({10, 20,
30}, {0, 1, 0});
+ ColumnRawPtrs key_columns = {col.get()};
+ const size_t rows = 3;
+
+ State state(key_columns);
+ method.init_serialized_keys(key_columns, rows);
+ for (size_t i = 0; i < rows; i++) {
+ method.lazy_emplace(
+ state, i,
+ [&](const auto& ctor, auto& key, auto& origin) { ctor(key,
make_mapped(i + 1)); },
+ [&](auto& mapped) { mapped = make_mapped(999); });
+ }
+
+ auto& ht = *method.hash_table;
+
+ // Null key is present and must be accessed separately (not via iterator)
+ EXPECT_TRUE(ht.has_null_key_data());
+ EXPECT_EQ(ht.get_null_key_data<AggregateDataPtr>(), make_mapped(999));
+
+ // DataWithNullKey::size() includes null key
+ EXPECT_EQ(ht.size(), 3); // 2 non-null + 1 null
+
+ // Iterator only visits non-null entries
+ std::vector<std::pair<UInt32, AggregateDataPtr>> non_null_entries;
+ for (auto it = ht.begin(); it != ht.end(); ++it) {
+ non_null_entries.emplace_back(it->get_first(), it->get_second());
+ }
+
+ // Only 2 non-null entries in iteration (null key excluded)
+ ASSERT_EQ(non_null_entries.size(), 2);
+ std::sort(non_null_entries.begin(), non_null_entries.end(),
+ [](const auto& a, const auto& b) { return a.first < b.first; });
+ // Inserted: row0=10→1, row2=30→3
+ EXPECT_EQ(non_null_entries[0].first, 10);
+ EXPECT_EQ(non_null_entries[0].second, make_mapped(1));
+ EXPECT_EQ(non_null_entries[1].first, 30);
+ EXPECT_EQ(non_null_entries[1].second, make_mapped(3));
+
+ // Iterator assignment
+ auto it = ht.begin();
+ auto it2 = it;
+ EXPECT_TRUE(it == it2);
+ EXPECT_EQ(it.get_second(), it2.get_second());
+
+ ++it;
+ EXPECT_TRUE(it != it2);
+
+ it2 = it;
+ EXPECT_TRUE(it == it2);
+}
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]