This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 91a6fb3af0a [fix](hashmap)Fix the agg hashmap iterator so it works 
correctly. (#61215)
91a6fb3af0a is described below

commit 91a6fb3af0a84adce7a18d8538a2d5e7b2f743e7
Author: Mryange <[email protected]>
AuthorDate: Thu Mar 12 12:07:33 2026 +0800

    [fix](hashmap)Fix the agg hashmap iterator so it works correctly. (#61215)
    
    ### What problem does this PR solve?
    Previously, for agg hashmap<key, value>, when using the iterator, we
    could only iterate over the values but not the keys.
    
    In addition, there is a hidden bug in the string hashmap (it just never
    got triggered before because we couldn’t iterate over the keys; I
    triggered it with BEUT in another PR
    https://github.com/apache/doris/pull/61058 ).
    
    This PR is a prerequisite; we will need a complete iterator for some
    upcoming features.
---
 be/src/exec/common/hash_table/ph_hash_map.h       |   6 +
 be/src/exec/common/hash_table/string_hash_map.h   |  25 +-
 be/src/exec/common/hash_table/string_hash_table.h |  14 +-
 be/test/exec/hash_map/hash_table_method_test.cpp  | 616 ++++++++++++++++++++++
 4 files changed, 656 insertions(+), 5 deletions(-)

diff --git a/be/src/exec/common/hash_table/ph_hash_map.h 
b/be/src/exec/common/hash_table/ph_hash_map.h
index 92a6b5f9557..212b59609c9 100644
--- a/be/src/exec/common/hash_table/ph_hash_map.h
+++ b/be/src/exec/common/hash_table/ph_hash_map.h
@@ -188,6 +188,12 @@ public:
         for (auto& v : *this) func(v.get_second());
     }
 
+    /// Call func(const Key &, Mapped &) for each hash map element.
+    template <typename Func>
+    void for_each(Func&& func) {
+        for (auto& v : *this) func(v.get_first(), v.get_second());
+    }
+
     size_t get_buffer_size_in_bytes() const {
         const auto capacity = _hash_map.capacity();
         return capacity * sizeof(typename HashMapImpl::slot_type);
diff --git a/be/src/exec/common/hash_table/string_hash_map.h 
b/be/src/exec/common/hash_table/string_hash_map.h
index c615e6b5dcc..1c1dfce50dd 100644
--- a/be/src/exec/common/hash_table/string_hash_map.h
+++ b/be/src/exec/common/hash_table/string_hash_map.h
@@ -68,7 +68,7 @@ struct StringHashMapCell<doris::StringRef, TMapped>
     using Base::Base;
     static constexpr bool need_zero_value_storage = false;
     // external
-    using Base::get_key;
+    const doris::StringRef& get_key() const { return this->value.first; } /// 
NOLINT
     // internal
     static const doris::StringRef& get_key(const value_type& value_) { return 
value_.first; }
 
@@ -150,6 +150,29 @@ public:
             func(v.get_second());
         }
     }
+
+    template <typename Func>
+    void for_each(Func&& func) {
+        if (this->m0.size()) {
+            func(this->m0.zero_value()->get_key(), 
this->m0.zero_value()->get_second());
+        }
+        for (auto& v : this->m1) {
+            func(v.get_key(), v.get_second());
+        }
+        for (auto& v : this->m2) {
+            func(v.get_key(), v.get_second());
+        }
+        for (auto& v : this->m3) {
+            func(v.get_key(), v.get_second());
+        }
+        for (auto& v : this->m4) {
+            func(v.get_key(), v.get_second());
+        }
+        for (auto& v : this->ms) {
+            func(v.get_key(), v.get_second());
+        }
+    }
+
     template <typename MappedType>
     char* get_null_key_data() {
         return nullptr;
diff --git a/be/src/exec/common/hash_table/string_hash_table.h 
b/be/src/exec/common/hash_table/string_hash_table.h
index 166b56bda29..a110291426f 100644
--- a/be/src/exec/common/hash_table/string_hash_table.h
+++ b/be/src/exec/common/hash_table/string_hash_table.h
@@ -50,7 +50,9 @@ StringKey to_string_key(const doris::StringRef& key) {
 template <typename T>
 inline doris::StringRef ALWAYS_INLINE to_string_ref(const T& n) {
     assert(n != 0);
-    return {reinterpret_cast<const char*>(&n), sizeof(T) - (__builtin_clzll(n) 
>> 3)};
+    // __builtin_clzll counts leading zero bits in a 64-bit (8-byte) value,
+    // so we must use 8 here instead of sizeof(T) to get the correct byte 
count.
+    return {reinterpret_cast<const char*>(&n), static_cast<size_t>(8 - 
(__builtin_clzll(n) >> 3))};
 }
 inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
     assert(n.items[1] != 0);
@@ -415,7 +417,7 @@ protected:
             return static_cast<Derived&>(*this);
         }
 
-        auto& operator*() const {
+        auto& operator*() {
             switch (sub_table_index) {
             case 0: {
                 this->cell = *(container->m0.zero_value());
@@ -444,9 +446,13 @@ protected:
             }
             return cell;
         }
-        auto* operator->() const { return &(this->operator*()); }
+        auto* operator->() { return &(this->operator*()); }
 
-        auto get_ptr() const { return &(this->operator*()); }
+        auto get_ptr() { return &(this->operator*()); }
+
+        // Provide get_first()/get_second() at the iterator level, consistent 
with PHHashMap::iterator
+        auto& get_first() { return (**this).get_first(); }
+        auto& get_second() { return (**this).get_second(); }
 
         size_t get_hash() const {
             switch (sub_table_index) {
diff --git a/be/test/exec/hash_map/hash_table_method_test.cpp 
b/be/test/exec/hash_map/hash_table_method_test.cpp
index 60cc2284aa2..697ce283751 100644
--- a/be/test/exec/hash_map/hash_table_method_test.cpp
+++ b/be/test/exec/hash_map/hash_table_method_test.cpp
@@ -20,6 +20,7 @@
 #include <set>
 
 #include "core/data_type/data_type_number.h"
+#include "exec/common/agg_utils.h"
 #include "exec/common/columns_hashing.h"
 #include "exec/common/hash_table/hash.h"
 #include "exec/common/hash_table/hash_map_context.h"
@@ -221,4 +222,619 @@ TEST(HashTableMethodTest, 
testNullableIteratorSkipsNullKey) {
     }
 }
 
+// Helper: create distinguishable AggregateDataPtr values for testing
+static AggregateDataPtr make_mapped(size_t val) {
+    return reinterpret_cast<AggregateDataPtr>(val);
+}
+
+// ========== MethodOneNumber<UInt32, AggData<UInt32>> ==========
+// AggData<UInt32> = PHHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>
+TEST(HashTableMethodTest, testMethodOneNumberAggInsertFindForEach) {
+    MethodOneNumber<UInt32, AggData<UInt32>> method;
+    using State = MethodOneNumber<UInt32, AggData<UInt32>>::State;
+
+    auto col = ColumnHelper::create_column<DataTypeInt32>({10, 20, 30, 40, 
50});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 5;
+
+    // Insert
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            method.lazy_emplace(
+                    state, i,
+                    [&](const auto& ctor, auto& key, auto& origin) {
+                        ctor(key, make_mapped(i + 1));
+                    },
+                    [](auto& mapped) { FAIL() << "unexpected null"; });
+        }
+    }
+
+    // Find existing keys
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            auto result = method.find(state, i);
+            ASSERT_TRUE(result.is_found());
+            EXPECT_EQ(result.get_mapped(), make_mapped(i + 1));
+        }
+    }
+
+    // Find non-existing key
+    {
+        auto miss_col = ColumnHelper::create_column<DataTypeInt32>({999});
+        ColumnRawPtrs miss_columns = {miss_col.get()};
+        State state(miss_columns);
+        method.init_serialized_keys(miss_columns, 1);
+        auto result = method.find(state, 0);
+        EXPECT_FALSE(result.is_found());
+    }
+
+    // for_each
+    {
+        size_t count = 0;
+        method.hash_table->for_each([&](const auto& key, auto& mapped) {
+            EXPECT_NE(mapped, nullptr);
+            count++;
+        });
+        EXPECT_EQ(count, 5);
+    }
+
+    // for_each_mapped
+    {
+        size_t count = 0;
+        method.hash_table->for_each_mapped([&](auto& mapped) {
+            EXPECT_NE(mapped, nullptr);
+            count++;
+        });
+        EXPECT_EQ(count, 5);
+    }
+}
+
+// ========== MethodOneNumber Phase2 (HashMixWrapper) ==========
+// AggregatedDataWithUInt32KeyPhase2 = PHHashMap<UInt32, AggregateDataPtr, 
HashMixWrapper<UInt32>>
+TEST(HashTableMethodTest, testMethodOneNumberPhase2AggInsertFindForEach) {
+    MethodOneNumber<UInt32, AggregatedDataWithUInt32KeyPhase2> method;
+    using State = MethodOneNumber<UInt32, 
AggregatedDataWithUInt32KeyPhase2>::State;
+
+    auto col = ColumnHelper::create_column<DataTypeInt32>({100, 200, 300});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 3;
+
+    // Insert
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            method.lazy_emplace(
+                    state, i,
+                    [&](const auto& ctor, auto& key, auto& origin) {
+                        ctor(key, make_mapped(i + 100));
+                    },
+                    [](auto& mapped) { FAIL(); });
+        }
+    }
+
+    // Find
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            auto result = method.find(state, i);
+            ASSERT_TRUE(result.is_found());
+            EXPECT_EQ(result.get_mapped(), make_mapped(i + 100));
+        }
+    }
+
+    // for_each + for_each_mapped
+    {
+        size_t count = 0;
+        method.hash_table->for_each([&](const auto& key, auto& mapped) { 
count++; });
+        EXPECT_EQ(count, 3);
+    }
+    {
+        size_t count = 0;
+        method.hash_table->for_each_mapped([&](auto& mapped) { count++; });
+        EXPECT_EQ(count, 3);
+    }
+}
+
+// ========== MethodStringNoCache<AggregatedDataWithShortStringKey> ==========
+// AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>
+TEST(HashTableMethodTest, testMethodStringNoCacheAggInsertFindForEach) {
+    MethodStringNoCache<AggregatedDataWithShortStringKey> method;
+    using State = MethodStringNoCache<AggregatedDataWithShortStringKey>::State;
+
+    // Include strings of varying lengths to exercise different StringHashMap 
sub-maps
+    auto col = ColumnHelper::create_column<DataTypeString>(
+            {"hello", "world", "foo", "bar", "longstring_exceeding_16_bytes"});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 5;
+
+    // Insert
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            method.lazy_emplace(
+                    state, i,
+                    [&](const auto& ctor, auto& key, auto& origin) {
+                        ctor(key, make_mapped(i + 10));
+                    },
+                    [](auto& mapped) { FAIL(); });
+        }
+    }
+
+    // Find
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            auto result = method.find(state, i);
+            ASSERT_TRUE(result.is_found());
+            EXPECT_EQ(result.get_mapped(), make_mapped(i + 10));
+        }
+    }
+
+    // for_each
+    {
+        size_t count = 0;
+        method.hash_table->for_each([&](const auto& key, auto& mapped) {
+            EXPECT_NE(mapped, nullptr);
+            count++;
+        });
+        EXPECT_EQ(count, 5);
+    }
+
+    // for_each_mapped
+    {
+        size_t count = 0;
+        method.hash_table->for_each_mapped([&](auto& mapped) { count++; });
+        EXPECT_EQ(count, 5);
+    }
+}
+
+// ========== MethodSerialized<AggregatedDataWithStringKey> ==========
+// AggregatedDataWithStringKey = PHHashMap<StringRef, AggregateDataPtr>
+// StringRef keys require arena persistence to survive across 
init_serialized_keys calls.
+TEST(HashTableMethodTest, testMethodSerializedAggInsertFindForEach) {
+    MethodSerialized<AggregatedDataWithStringKey> method;
+    using State = MethodSerialized<AggregatedDataWithStringKey>::State;
+
+    auto col1 = ColumnHelper::create_column<DataTypeInt32>({1, 2, 3});
+    auto col2 = ColumnHelper::create_column<DataTypeString>({"a", "bb", 
"ccc"});
+    ColumnRawPtrs key_columns = {col1.get(), col2.get()};
+    const size_t rows = 3;
+
+    // Use a separate arena to persist StringRef key data
+    Arena persist_arena;
+
+    // Insert
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            method.lazy_emplace(
+                    state, i,
+                    [&](const auto& ctor, auto& key, auto& origin) {
+                        method.try_presis_key_and_origin(key, origin, 
persist_arena);
+                        ctor(key, make_mapped(i + 50));
+                    },
+                    [](auto& mapped) { FAIL(); });
+        }
+    }
+
+    // for_each (keys backed by persist_arena)
+    {
+        size_t count = 0;
+        method.hash_table->for_each([&](const auto& key, auto& mapped) {
+            EXPECT_GT(key.size, 0);
+            EXPECT_NE(mapped, nullptr);
+            count++;
+        });
+        EXPECT_EQ(count, 3);
+    }
+
+    // for_each_mapped
+    {
+        size_t count = 0;
+        method.hash_table->for_each_mapped([&](auto& mapped) {
+            EXPECT_NE(mapped, nullptr);
+            count++;
+        });
+        EXPECT_EQ(count, 3);
+    }
+
+    // Find (re-init serialized keys for lookup)
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            auto result = method.find(state, i);
+            ASSERT_TRUE(result.is_found());
+            EXPECT_EQ(result.get_mapped(), make_mapped(i + 50));
+        }
+    }
+}
+
+// ========== MethodKeysFixed<AggData<UInt64>> ==========
+// Fixed-width multi-column keys packed into UInt64
+TEST(HashTableMethodTest, testMethodKeysFixedAggInsertFindForEach) {
+    MethodKeysFixed<AggData<UInt64>> method(Sizes {sizeof(int32_t), 
sizeof(int32_t)});
+    using State = MethodKeysFixed<AggData<UInt64>>::State;
+
+    auto col1 = ColumnHelper::create_column<DataTypeInt32>({1, 2, 3, 4});
+    auto col2 = ColumnHelper::create_column<DataTypeInt32>({10, 20, 30, 40});
+    ColumnRawPtrs key_columns = {col1.get(), col2.get()};
+    const size_t rows = 4;
+
+    // Insert
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            method.lazy_emplace(
+                    state, i,
+                    [&](const auto& ctor, auto& key, auto& origin) {
+                        ctor(key, make_mapped(i + 200));
+                    },
+                    [](auto& mapped) { FAIL(); });
+        }
+    }
+
+    // Find
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            auto result = method.find(state, i);
+            ASSERT_TRUE(result.is_found());
+            EXPECT_EQ(result.get_mapped(), make_mapped(i + 200));
+        }
+    }
+
+    // for_each
+    {
+        size_t count = 0;
+        method.hash_table->for_each([&](const auto& key, auto& mapped) {
+            EXPECT_NE(mapped, nullptr);
+            count++;
+        });
+        EXPECT_EQ(count, 4);
+    }
+}
+
+// ========== Nullable MethodOneNumber (MethodSingleNullableColumn + 
MethodOneNumber) ==========
+// AggDataNullable<UInt32> = DataWithNullKey<PHHashMap<UInt32, 
AggregateDataPtr, HashCRC32<UInt32>>>
+// Tests null key insertion, find, and for_each (which excludes null from 
PHHashMap::for_each).
+TEST(HashTableMethodTest, testNullableMethodOneNumberAggInsertFindForEach) {
+    using NullableMethod =
+            MethodSingleNullableColumn<MethodOneNumber<UInt32, 
AggDataNullable<UInt32>>>;
+    NullableMethod method;
+    using State = NullableMethod::State;
+
+    // values: {10, 20, 30, 40, 50}, null at rows 1 and 3
+    auto col = ColumnHelper::create_nullable_column<DataTypeInt32>({10, 20, 
30, 40, 50},
+                                                                   {0, 1, 0, 
1, 0});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 5;
+
+    // Insert
+    size_t null_create_count = 0;
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            method.lazy_emplace(
+                    state, i,
+                    [&](const auto& ctor, auto& key, auto& origin) {
+                        ctor(key, make_mapped(i + 1));
+                    },
+                    [&](auto& mapped) {
+                        null_create_count++;
+                        mapped = make_mapped(999);
+                    });
+        }
+    }
+
+    // null_creator called once for first null row (index 1); second null 
(index 3) is deduplicated
+    EXPECT_EQ(null_create_count, 1);
+
+    auto& ht = *method.hash_table;
+    EXPECT_TRUE(ht.has_null_key_data());
+    EXPECT_EQ(ht.get_null_key_data<AggregateDataPtr>(), make_mapped(999));
+
+    // Find
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+
+        // row 0: key=10, non-null
+        auto r0 = method.find(state, 0);
+        ASSERT_TRUE(r0.is_found());
+        EXPECT_EQ(r0.get_mapped(), make_mapped(1));
+
+        // row 1: null → returns null key data
+        auto r1 = method.find(state, 1);
+        ASSERT_TRUE(r1.is_found());
+        EXPECT_EQ(r1.get_mapped(), make_mapped(999));
+
+        // row 2: key=30, non-null
+        auto r2 = method.find(state, 2);
+        ASSERT_TRUE(r2.is_found());
+        EXPECT_EQ(r2.get_mapped(), make_mapped(3));
+
+        // row 4: key=50, non-null
+        auto r4 = method.find(state, 4);
+        ASSERT_TRUE(r4.is_found());
+        EXPECT_EQ(r4.get_mapped(), make_mapped(5));
+    }
+
+    // for_each_mapped: PHHashMap::for_each_mapped iterates only non-null keys
+    {
+        size_t count = 0;
+        ht.for_each_mapped([&](auto& mapped) { count++; });
+        EXPECT_EQ(count, 3); // keys 10, 30, 50
+    }
+
+    // DataWithNullKey::size() includes null key
+    EXPECT_EQ(ht.size(), 4); // 3 non-null + 1 null
+}
+
+// ========== Nullable MethodStringNoCache ==========
+// AggregatedDataWithNullableShortStringKey = 
DataWithNullKey<StringHashMap<AggregateDataPtr>>
+TEST(HashTableMethodTest, testNullableMethodStringNoCacheAggInsertFindForEach) 
{
+    using NullableMethod = MethodSingleNullableColumn<
+            MethodStringNoCache<AggregatedDataWithNullableShortStringKey>>;
+    NullableMethod method;
+    using State = NullableMethod::State;
+
+    // values: {"hello", <null>, "world", <null>}
+    auto col = ColumnHelper::create_nullable_column<DataTypeString>({"hello", 
"", "world", ""},
+                                                                    {0, 1, 0, 
1});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 4;
+
+    // Insert
+    size_t null_create_count = 0;
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+        for (size_t i = 0; i < rows; i++) {
+            method.lazy_emplace(
+                    state, i,
+                    [&](const auto& ctor, auto& key, auto& origin) {
+                        ctor(key, make_mapped(i + 1));
+                    },
+                    [&](auto& mapped) {
+                        null_create_count++;
+                        mapped = make_mapped(888);
+                    });
+        }
+    }
+
+    EXPECT_EQ(null_create_count, 1);
+
+    auto& ht = *method.hash_table;
+    EXPECT_TRUE(ht.has_null_key_data());
+    EXPECT_EQ(ht.get_null_key_data<AggregateDataPtr>(), make_mapped(888));
+
+    // Find
+    {
+        State state(key_columns);
+        method.init_serialized_keys(key_columns, rows);
+
+        // row 0: "hello"
+        auto r0 = method.find(state, 0);
+        ASSERT_TRUE(r0.is_found());
+        EXPECT_EQ(r0.get_mapped(), make_mapped(1));
+
+        // row 1: null
+        auto r1 = method.find(state, 1);
+        ASSERT_TRUE(r1.is_found());
+        EXPECT_EQ(r1.get_mapped(), make_mapped(888));
+
+        // row 2: "world"
+        auto r2 = method.find(state, 2);
+        ASSERT_TRUE(r2.is_found());
+        EXPECT_EQ(r2.get_mapped(), make_mapped(3));
+    }
+
+    // for_each: StringHashMap::for_each iterates only non-null keys
+    {
+        size_t count = 0;
+        ht.for_each([&](const auto& key, auto& mapped) { count++; });
+        EXPECT_EQ(count, 2); // "hello" and "world"
+    }
+
+    // DataWithNullKey::size() includes null key
+    EXPECT_EQ(ht.size(), 3); // 2 non-null + 1 null
+}
+
+// ========== PHHashMap iterator: traverse, sort, verify, assignment ==========
+TEST(HashTableMethodTest, testPHHashMapIterator) {
+    MethodOneNumber<UInt32, AggData<UInt32>> method;
+    using State = MethodOneNumber<UInt32, AggData<UInt32>>::State;
+
+    auto col = ColumnHelper::create_column<DataTypeInt32>({50, 10, 40, 20, 
30});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 5;
+
+    State state(key_columns);
+    method.init_serialized_keys(key_columns, rows);
+    for (size_t i = 0; i < rows; i++) {
+        method.lazy_emplace(
+                state, i,
+                [&](const auto& ctor, auto& key, auto& origin) { ctor(key, 
make_mapped(i + 1)); },
+                [](auto& mapped) { FAIL(); });
+    }
+
+    auto& ht = *method.hash_table;
+
+    // Collect all (key, mapped) pairs via iterator
+    std::vector<std::pair<UInt32, AggregateDataPtr>> entries;
+    for (auto it = ht.begin(); it != ht.end(); ++it) {
+        entries.emplace_back(it->get_first(), it->get_second());
+    }
+    ASSERT_EQ(entries.size(), 5);
+
+    // Sort by key and verify
+    std::sort(entries.begin(), entries.end(),
+              [](const auto& a, const auto& b) { return a.first < b.first; });
+    // Inserted: {50→1, 10→2, 40→3, 20→4, 30→5}
+    EXPECT_EQ(entries[0].first, 10);
+    EXPECT_EQ(entries[0].second, make_mapped(2));
+    EXPECT_EQ(entries[1].first, 20);
+    EXPECT_EQ(entries[1].second, make_mapped(4));
+    EXPECT_EQ(entries[2].first, 30);
+    EXPECT_EQ(entries[2].second, make_mapped(5));
+    EXPECT_EQ(entries[3].first, 40);
+    EXPECT_EQ(entries[3].second, make_mapped(3));
+    EXPECT_EQ(entries[4].first, 50);
+    EXPECT_EQ(entries[4].second, make_mapped(1));
+
+    // Iterator assignment: it = begin(), it2 = it
+    auto it = ht.begin();
+    auto it2 = it; // copy
+    EXPECT_TRUE(it == it2);
+    EXPECT_EQ(it->get_first(), it2->get_first());
+    EXPECT_EQ(it->get_second(), it2->get_second());
+
+    ++it;
+    EXPECT_TRUE(it != it2); // diverged after increment
+
+    it2 = it; // reassignment
+    EXPECT_TRUE(it == it2);
+    EXPECT_EQ(it->get_first(), it2->get_first());
+
+    // Empty hash table: begin == end
+    MethodOneNumber<UInt32, AggData<UInt32>> empty_method;
+    EXPECT_TRUE(empty_method.hash_table->begin() == 
empty_method.hash_table->end());
+}
+
+// ========== StringHashMap iterator: traverse, sort, verify, assignment 
==========
+TEST(HashTableMethodTest, testStringHashMapIterator) {
+    MethodStringNoCache<AggregatedDataWithShortStringKey> method;
+    using State = MethodStringNoCache<AggregatedDataWithShortStringKey>::State;
+
+    // Different lengths to hit different sub-maps (m1: <=8, m2: <=16, m3: 
<=24, ms: >24)
+    auto col = ColumnHelper::create_column<DataTypeString>(
+            {"z", "ab", "hello_world_12345", "tiny", 
"a_very_long_string_over_24_chars"});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 5;
+
+    State state(key_columns);
+    method.init_serialized_keys(key_columns, rows);
+    for (size_t i = 0; i < rows; i++) {
+        method.lazy_emplace(
+                state, i,
+                [&](const auto& ctor, auto& key, auto& origin) { ctor(key, 
make_mapped(i + 1)); },
+                [](auto& mapped) { FAIL(); });
+    }
+
+    auto& ht = *method.hash_table;
+
+    // Collect all (key_string, mapped) pairs via iterator
+    std::vector<std::pair<std::string, AggregateDataPtr>> entries;
+    for (auto it = ht.begin(); it != ht.end(); ++it) {
+        auto key = it.get_first();
+        entries.emplace_back(std::string(key.data, key.size), it.get_second());
+    }
+    ASSERT_EQ(entries.size(), 5);
+
+    // Sort by key string and verify
+    std::sort(entries.begin(), entries.end(),
+              [](const auto& a, const auto& b) { return a.first < b.first; });
+    // Sorted: "a_very_long...", "ab", "hello_world_12345", "tiny", "z"
+    EXPECT_EQ(entries[0].first, "a_very_long_string_over_24_chars");
+    EXPECT_EQ(entries[0].second, make_mapped(5));
+    EXPECT_EQ(entries[1].first, "ab");
+    EXPECT_EQ(entries[1].second, make_mapped(2));
+    EXPECT_EQ(entries[2].first, "hello_world_12345");
+    EXPECT_EQ(entries[2].second, make_mapped(3));
+    EXPECT_EQ(entries[3].first, "tiny");
+    EXPECT_EQ(entries[3].second, make_mapped(4));
+    EXPECT_EQ(entries[4].first, "z");
+    EXPECT_EQ(entries[4].second, make_mapped(1));
+
+    // Iterator assignment: copy and reassign
+    auto it = ht.begin();
+    auto it2 = it;
+    EXPECT_TRUE(it == it2);
+
+    auto key1 = it.get_first();
+    auto key2 = it2.get_first();
+    EXPECT_EQ(key1, key2);
+
+    ++it;
+    EXPECT_TRUE(it != it2);
+
+    it2 = it; // reassignment
+    EXPECT_TRUE(it == it2);
+
+    // Empty StringHashMap: begin == end
+    MethodStringNoCache<AggregatedDataWithShortStringKey> empty_method;
+    EXPECT_TRUE(empty_method.hash_table->begin() == 
empty_method.hash_table->end());
+}
+
+// ========== DataWithNullKey iterator: only non-null entries, null key 
accessed separately ==========
+TEST(HashTableMethodTest, testDataWithNullKeyIterator) {
+    using NullableMethod =
+            MethodSingleNullableColumn<MethodOneNumber<UInt32, 
AggDataNullable<UInt32>>>;
+    NullableMethod method;
+    using State = NullableMethod::State;
+
+    // values: {10, 20, 30}, null at row 1
+    auto col = ColumnHelper::create_nullable_column<DataTypeInt32>({10, 20, 
30}, {0, 1, 0});
+    ColumnRawPtrs key_columns = {col.get()};
+    const size_t rows = 3;
+
+    State state(key_columns);
+    method.init_serialized_keys(key_columns, rows);
+    for (size_t i = 0; i < rows; i++) {
+        method.lazy_emplace(
+                state, i,
+                [&](const auto& ctor, auto& key, auto& origin) { ctor(key, 
make_mapped(i + 1)); },
+                [&](auto& mapped) { mapped = make_mapped(999); });
+    }
+
+    auto& ht = *method.hash_table;
+
+    // Null key is present and must be accessed separately (not via iterator)
+    EXPECT_TRUE(ht.has_null_key_data());
+    EXPECT_EQ(ht.get_null_key_data<AggregateDataPtr>(), make_mapped(999));
+
+    // DataWithNullKey::size() includes null key
+    EXPECT_EQ(ht.size(), 3); // 2 non-null + 1 null
+
+    // Iterator only visits non-null entries
+    std::vector<std::pair<UInt32, AggregateDataPtr>> non_null_entries;
+    for (auto it = ht.begin(); it != ht.end(); ++it) {
+        non_null_entries.emplace_back(it->get_first(), it->get_second());
+    }
+
+    // Only 2 non-null entries in iteration (null key excluded)
+    ASSERT_EQ(non_null_entries.size(), 2);
+    std::sort(non_null_entries.begin(), non_null_entries.end(),
+              [](const auto& a, const auto& b) { return a.first < b.first; });
+    // Inserted: row0=10→1, row2=30→3
+    EXPECT_EQ(non_null_entries[0].first, 10);
+    EXPECT_EQ(non_null_entries[0].second, make_mapped(1));
+    EXPECT_EQ(non_null_entries[1].first, 30);
+    EXPECT_EQ(non_null_entries[1].second, make_mapped(3));
+
+    // Iterator assignment
+    auto it = ht.begin();
+    auto it2 = it;
+    EXPECT_TRUE(it == it2);
+    EXPECT_EQ(it.get_second(), it2.get_second());
+
+    ++it;
+    EXPECT_TRUE(it != it2);
+
+    it2 = it;
+    EXPECT_TRUE(it == it2);
+}
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to