This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ca983a83cab [Improvement] add zlib_crc32_fixed (#60735)
ca983a83cab is described below

commit ca983a83cabfeaf063a55396ecabf9f804559d34
Author: Pxl <[email protected]>
AuthorDate: Wed Feb 25 14:36:57 2026 +0800

    [Improvement] add zlib_crc32_fixed (#60735)
    
    zlib_crc32: SplitBlockHashComputeTime: 269.15ms
    crc32c_fixed: SplitBlockHashComputeTime: 33.258ms
    zlib_crc32_fixed: SplitBlockHashComputeTime: 61.941ms
---
 be/src/util/hash_util.hpp             |  75 ++++++-
 be/src/vec/columns/column_decimal.cpp |  11 +-
 be/src/vec/columns/column_vector.cpp  |  46 ++--
 be/src/vec/columns/column_vector.h    |   1 +
 be/test/util/crc32c_test.cpp          | 401 ++++++++++++++++++++++++++++++++++
 5 files changed, 500 insertions(+), 34 deletions(-)

diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp
index 9c5d4ef3aca..9371f8867ca 100644
--- a/be/src/util/hash_util.hpp
+++ b/be/src/util/hash_util.hpp
@@ -34,20 +34,93 @@
 #include "util/hash/city.h"
 #include "util/murmur_hash3.h"
 #include "util/sse_util.hpp"
+#include "vec/common/endian.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
+namespace detail {
+// Slicing-by-4 table: t[0] is the standard byte-at-a-time table,
+// t[1..3] are extended tables for parallel 4-byte processing.
+struct CRC32SliceBy4Table {
+    uint32_t t[4][256] {};
+    constexpr CRC32SliceBy4Table() {
+        // t[0]: standard CRC32 lookup table
+        for (uint32_t i = 0; i < 256; i++) {
+            uint32_t c = i;
+            for (int j = 0; j < 8; j++) {
+                c = (c & 1) ? ((c >> 1) ^ 0xEDB88320U) : (c >> 1);
+            }
+            t[0][i] = c;
+        }
+        // t[1..3]: each entry is one additional CRC byte-step applied to 
t[k-1]
+        for (uint32_t i = 0; i < 256; i++) {
+            uint32_t c = t[0][i];
+            for (int k = 1; k < 4; k++) {
+                c = t[0][c & 0xFF] ^ (c >> 8);
+                t[k][i] = c;
+            }
+        }
+    }
+};
+} // namespace detail
+
 // Utility class to compute hash values.
 class HashUtil {
+private:
+    static inline constexpr detail::CRC32SliceBy4Table CRC32_TABLE {};
+
 public:
     static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t 
hash) {
         return (uint32_t)crc32(hash, (const unsigned char*)data, bytes);
     }
 
+    // Inline CRC32 (zlib-compatible, standard CRC32 polynomial) for 
fixed-size types.
+    // Uses Slicing-by-4 technique for 4/8-byte types: processes 4 bytes at a 
time using
+    // 4 precomputed lookup tables, reducing serial table lookups from 4 to 1 
per 4-byte chunk.
+    // Polynomial: 0xEDB88320 (reflected form of 0x04C11DB7).
+    // Endian note: CRC32 reflected algorithm processes bytes in address order 
(byte[0] first).
+    // Slicing-by-4 requires byte[0] at LSB of the loaded uint32_t, which is 
little-endian layout.
+    // LittleEndian::Load32 provides this on ALL platforms: noop on LE, bswap 
on BE.
+    template <typename T>
+    static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) {
+        const auto* p = reinterpret_cast<const uint8_t*>(&value);
+        // zlib convention: pre/post XOR with 0xFFFFFFFF
+        uint32_t crc = hash ^ 0xFFFFFFFFU;
+
+        if constexpr (sizeof(T) == 1) {
+            // 1 byte: single table lookup
+            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
+        } else if constexpr (sizeof(T) == 2) {
+            // 2 bytes: two sequential table lookups (slicing doesn't help 
below 4 bytes)
+            crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8);
+            crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8);
+        } else if constexpr (sizeof(T) == 4) {
+            // 4 bytes: one Slicing-by-4 step — 4 independent lookups in 
parallel
+            // LittleEndian::Load32 handles unaligned load + byte-swap on 
big-endian,
+            // ensuring byte[0] is always at LSB for correct CRC byte 
processing order.
+            uint32_t word = LittleEndian::Load32(p) ^ crc;
+            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) 
& 0xFF] ^
+                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ 
CRC32_TABLE.t[0][(word >> 24) & 0xFF];
+        } else if constexpr (sizeof(T) == 8) {
+            // 8 bytes: two Slicing-by-4 steps
+            uint32_t word = LittleEndian::Load32(p) ^ crc;
+            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) 
& 0xFF] ^
+                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ 
CRC32_TABLE.t[0][(word >> 24) & 0xFF];
+
+            word = LittleEndian::Load32(p + 4) ^ crc;
+            crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) 
& 0xFF] ^
+                  CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ 
CRC32_TABLE.t[0][(word >> 24) & 0xFF];
+        } else {
+            // Fallback to zlib for larger/unusual types
+            return (uint32_t)crc32(hash, (const unsigned char*)&value, 
sizeof(T));
+        }
+        return crc ^ 0xFFFFFFFFU;
+    }
+
     static uint32_t zlib_crc_hash_null(uint32_t hash) {
         // null is treat as 0 when hash
         static const int INT_VALUE = 0;
-        return (uint32_t)crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
+        return zlib_crc32_fixed(INT_VALUE, hash);
     }
 
     template <typename T>
diff --git a/be/src/vec/columns/column_decimal.cpp 
b/be/src/vec/columns/column_decimal.cpp
index 94afcc6aa38..90d126f4911 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -170,7 +170,7 @@ void ColumnDecimal<T>::update_crc_with_value(size_t start, 
size_t end, uint32_t&
     if (null_data == nullptr) {
         for (size_t i = start; i < end; i++) {
             if constexpr (T != TYPE_DECIMALV2) {
-                hash = HashUtil::zlib_crc_hash(&data[i], sizeof(value_type), 
hash);
+                hash = HashUtil::zlib_crc32_fixed(data[i], hash);
             } else {
                 decimalv2_do_crc(i, hash);
             }
@@ -179,7 +179,7 @@ void ColumnDecimal<T>::update_crc_with_value(size_t start, 
size_t end, uint32_t&
         for (size_t i = start; i < end; i++) {
             if (null_data[i] == 0) {
                 if constexpr (T != TYPE_DECIMALV2) {
-                    hash = HashUtil::zlib_crc_hash(&data[i], 
sizeof(value_type), hash);
+                    hash = HashUtil::zlib_crc32_fixed(data[i], hash);
                 } else {
                     decimalv2_do_crc(i, hash);
                 }
@@ -198,12 +198,13 @@ void ColumnDecimal<T>::update_crcs_with_value(uint32_t* 
__restrict hashes, Primi
     if constexpr (T != TYPE_DECIMALV2) {
         if (null_data == nullptr) {
             for (size_t i = 0; i < s; i++) {
-                hashes[i] = HashUtil::zlib_crc_hash(&data[i], 
sizeof(value_type), hashes[i]);
+                hashes[i] = HashUtil::zlib_crc32_fixed(data[i], hashes[i]);
             }
         } else {
             for (size_t i = 0; i < s; i++) {
-                if (null_data[i] == 0)
-                    hashes[i] = HashUtil::zlib_crc_hash(&data[i], 
sizeof(value_type), hashes[i]);
+                if (null_data[i] == 0) {
+                    hashes[i] = HashUtil::zlib_crc32_fixed(data[i], hashes[i]);
+                }
             }
         }
     } else {
diff --git a/be/src/vec/columns/column_vector.cpp 
b/be/src/vec/columns/column_vector.cpp
index 21d7889e825..a34390836cb 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -201,41 +201,31 @@ void ColumnVector<T>::update_crcs_with_value(uint32_t* 
__restrict hashes, Primit
     auto s = rows;
     DCHECK(s == size());
 
-    if constexpr (is_date_or_datetime(T)) {
-        char buf[64];
-        auto date_convert_do_crc = [&](size_t i) {
-            const auto& date_val = (const VecDateTimeValue&)data[i];
-            auto len = date_val.to_buffer(buf);
-            hashes[i] = HashUtil::zlib_crc_hash(buf, len, hashes[i]);
-        };
-
-        if (null_data == nullptr) {
-            for (size_t i = 0; i < s; i++) {
-                date_convert_do_crc(i);
-            }
-        } else {
-            for (size_t i = 0; i < s; i++) {
-                if (null_data[i] == 0) {
-                    date_convert_do_crc(i);
-                }
-            }
+    if (null_data == nullptr) {
+        for (size_t i = 0; i < s; i++) {
+            hashes[i] = _zlib_crc32_hash(hashes[i], i);
         }
     } else {
-        if (null_data == nullptr) {
-            for (size_t i = 0; i < s; i++) {
-                hashes[i] = HashUtil::zlib_crc_hash(
-                        &data[i], sizeof(typename 
PrimitiveTypeTraits<T>::CppType), hashes[i]);
-            }
-        } else {
-            for (size_t i = 0; i < s; i++) {
-                if (null_data[i] == 0)
-                    hashes[i] = HashUtil::zlib_crc_hash(
-                            &data[i], sizeof(typename 
PrimitiveTypeTraits<T>::CppType), hashes[i]);
+        for (size_t i = 0; i < s; i++) {
+            if (null_data[i] == 0) {
+                hashes[i] = _zlib_crc32_hash(hashes[i], i);
             }
         }
     }
 }
 
+template <PrimitiveType T>
+uint32_t ColumnVector<T>::_zlib_crc32_hash(uint32_t hash, size_t idx) const {
+    if constexpr (is_date_or_datetime(T)) {
+        char buf[64];
+        const auto& date_val = (const VecDateTimeValue&)data[idx];
+        auto len = date_val.to_buffer(buf);
+        return HashUtil::zlib_crc_hash(buf, len, hash);
+    } else {
+        return HashUtil::zlib_crc32_fixed(data[idx], hash);
+    }
+}
+
 template <PrimitiveType T>
 uint32_t ColumnVector<T>::_crc32c_hash(uint32_t hash, size_t idx) const {
     if constexpr (is_date_or_datetime(T)) {
diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index fb3141d6485..9e97eeae6b5 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -405,6 +405,7 @@ public:
     }
 
 protected:
+    uint32_t _zlib_crc32_hash(uint32_t hash, size_t idx) const;
     uint32_t _crc32c_hash(uint32_t hash, size_t idx) const;
     Container data;
 };
diff --git a/be/test/util/crc32c_test.cpp b/be/test/util/crc32c_test.cpp
index 5a6a7faa3a5..e9795882b36 100644
--- a/be/test/util/crc32c_test.cpp
+++ b/be/test/util/crc32c_test.cpp
@@ -22,10 +22,14 @@
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
 #include <string.h>
+#include <zlib.h>
 
+#include <cstdint>
+#include <limits>
 #include <vector>
 
 #include "gtest/gtest_pred_impl.h"
+#include "util/hash_util.hpp"
 #include "util/slice.h"
 
 namespace doris {
@@ -75,3 +79,400 @@ TEST(CRC, Extend) {
 }
 
 } // namespace doris
+
+namespace doris {
+
+// Helper: compute crc32c via crc32c::Crc32c for a value of type T
+template <typename T>
+uint32_t crc32c_reference(const T& value, uint32_t seed) {
+    return crc32c::Extend(seed, reinterpret_cast<const uint8_t*>(&value), 
sizeof(T));
+}
+
+// Helper: compute zlib crc32 for a value of type T
+template <typename T>
+uint32_t zlib_crc32_reference(const T& value, uint32_t seed) {
+    return HashUtil::zlib_crc_hash(&value, sizeof(T), seed);
+}
+
+/*
+todo: fix those cases when we have a new release version; do not consider the 
compatibility issue
+use following code to replace the old crc32c_fixed function in hash_util.hpp
+template <typename T>
+static uint32_t crc32c_fixed(const T& value, uint32_t hash) {
+    uint32_t crc = hash ^ 0xFFFFFFFFU;
+    if constexpr (sizeof(T) == 1) {
+        crc = _mm_crc32_u8(crc, *reinterpret_cast<const uint8_t*>(&value));
+    } else if constexpr (sizeof(T) == 2) {
+        crc = _mm_crc32_u16(crc, *reinterpret_cast<const uint16_t*>(&value));
+    } else if constexpr (sizeof(T) == 4) {
+        crc = _mm_crc32_u32(crc, *reinterpret_cast<const uint32_t*>(&value));
+    } else if constexpr (sizeof(T) == 8) {
+        crc = (uint32_t)_mm_crc32_u64(crc, *reinterpret_cast<const 
uint64_t*>(&value));
+    } else {
+        return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T));
+    }
+    return crc ^ 0xFFFFFFFFU;
+}
+// ==================== crc32c_fixed tests ====================
+TEST(CRC32CFixed, Uint8Values) {
+    uint8_t values[] = {0, 1, 127, 128, 255};
+    for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "uint8_t v=" << (int)v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, Uint16Values) {
+    uint16_t values[] = {0, 1, 255, 256, 1000, 32767, 65535};
+    for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0x12345678U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "uint16_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, Int32Values) {
+    int32_t values[] = {0,
+                        1,
+                        -1,
+                        42,
+                        -42,
+                        1000000,
+                        -1000000,
+                        std::numeric_limits<int32_t>::min(),
+                        std::numeric_limits<int32_t>::max()};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xCAFEBABEU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "int32_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, Uint32Values) {
+    uint32_t values[] = {0, 1, 0xFF, 0xFFFF, 0xFFFFFFFF, 0xDEADBEEF, 
0x12345678};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCD1234U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "uint32_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, Int64Values) {
+    int64_t values[] = {0,
+                        1,
+                        -1,
+                        1000000000LL,
+                        -1000000000LL,
+                        std::numeric_limits<int64_t>::min(),
+                        std::numeric_limits<int64_t>::max(),
+                        0x0102030405060708LL,
+                        -0x0102030405060708LL};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x87654321U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "int64_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, Uint64Values) {
+    uint64_t values[] = {0,
+                         1,
+                         0xFFFFFFFFFFFFFFFFULL,
+                         0xDEADBEEFCAFEBABEULL,
+                         0x0123456789ABCDEFULL,
+                         0xFF00FF00FF00FF00ULL};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x11111111U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "uint64_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, FloatValues) {
+    float values[] = {0.0f,
+                      -0.0f,
+                      1.0f,
+                      -1.0f,
+                      3.14f,
+                      std::numeric_limits<float>::min(),
+                      std::numeric_limits<float>::max(),
+                      std::numeric_limits<float>::infinity()};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "float v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, DoubleValues) {
+    double values[] = {0.0,
+                       -0.0,
+                       1.0,
+                       -1.0,
+                       3.141592653589793,
+                       1e100,
+                       -1e100,
+                       std::numeric_limits<double>::infinity()};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, 
seed))
+                    << "double v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(CRC32CFixed, NullHash) {
+    // crc32c_null should match crc32c_fixed with int(0)
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
+        int zero = 0;
+        EXPECT_EQ(HashUtil::crc32c_null(seed), HashUtil::crc32c_fixed(zero, 
seed));
+        EXPECT_EQ(HashUtil::crc32c_null(seed), crc32c_reference(zero, seed));
+    }
+}
+*/
+// ==================== zlib_crc32_fixed tests ====================
+
+TEST(ZlibCRC32Fixed, Uint8Values) {
+    uint8_t values[] = {0, 1, 42, 127, 128, 255};
+    for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "uint8_t v=" << (int)v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, Int16Values) {
+    int16_t values[] = {0, 1, -1, 256, -256, 32767, -32768};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x12345678U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "int16_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, Uint16Values) {
+    uint16_t values[] = {0, 1, 255, 256, 1000, 32767, 65535};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCDEF00U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "uint16_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, Int32Values) {
+    int32_t values[] = {0,
+                        1,
+                        -1,
+                        42,
+                        -42,
+                        1000000,
+                        -1000000,
+                        std::numeric_limits<int32_t>::min(),
+                        std::numeric_limits<int32_t>::max()};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xCAFEBABEU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "int32_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, Uint32Values) {
+    uint32_t values[] = {0, 1, 0xFF, 0xFFFF, 0xFFFFFFFF, 0xDEADBEEF, 
0x12345678};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCD1234U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "uint32_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, Int64Values) {
+    int64_t values[] = {0,
+                        1,
+                        -1,
+                        1000000000LL,
+                        -1000000000LL,
+                        std::numeric_limits<int64_t>::min(),
+                        std::numeric_limits<int64_t>::max(),
+                        0x0102030405060708LL,
+                        -0x0102030405060708LL};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x87654321U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "int64_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, Uint64Values) {
+    uint64_t values[] = {0,
+                         1,
+                         0xFFFFFFFFFFFFFFFFULL,
+                         0xDEADBEEFCAFEBABEULL,
+                         0x0123456789ABCDEFULL,
+                         0xFF00FF00FF00FF00ULL};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x11111111U}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "uint64_t v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, FloatValues) {
+    float values[] = {0.0f,
+                      -0.0f,
+                      1.0f,
+                      -1.0f,
+                      3.14f,
+                      1e10f,
+                      -1e10f,
+                      std::numeric_limits<float>::min(),
+                      std::numeric_limits<float>::max(),
+                      std::numeric_limits<float>::infinity()};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "float v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, DoubleValues) {
+    double values[] = {0.0,
+                       -0.0,
+                       1.0,
+                       -1.0,
+                       3.141592653589793,
+                       1e100,
+                       -1e100,
+                       1e-300,
+                       std::numeric_limits<double>::infinity()};
+    for (uint32_t seed : {0U, 0xFFFFFFFFU}) {
+        for (auto v : values) {
+            EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), 
zlib_crc32_reference(v, seed))
+                    << "double v=" << v << " seed=" << seed;
+        }
+    }
+}
+
+TEST(ZlibCRC32Fixed, NullHash) {
+    // zlib_crc_hash_null should match zlib_crc32_fixed with int(0)
+    for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xDEADBEEFU}) {
+        int zero = 0;
+        EXPECT_EQ(HashUtil::zlib_crc_hash_null(seed), 
HashUtil::zlib_crc32_fixed(zero, seed));
+        EXPECT_EQ(HashUtil::zlib_crc_hash_null(seed), 
zlib_crc32_reference(zero, seed));
+    }
+}
+
+// ==================== Cross-validation: fixed vs non-fixed should differ 
====================
+
+TEST(CRC32Fixed, CRC32CVsZlibDiffer) {
+    // CRC32C and standard CRC32 use different polynomials, so results should 
differ
+    // (except possibly by coincidence on some values, but not systematically)
+    int32_t v = 12345678;
+    uint32_t seed = 0;
+    uint32_t crc32c_result = HashUtil::crc32c_fixed(v, seed);
+    uint32_t zlib_result = HashUtil::zlib_crc32_fixed(v, seed);
+    EXPECT_NE(crc32c_result, zlib_result)
+            << "CRC32C and zlib CRC32 should produce different results for 
non-trivial input";
+}
+
+// ==================== Chaining: verify incremental hashing 
====================
+/*
+TEST(CRC32CFixed, IncrementalChaining) {
+    // Hash two int32 values incrementally and compare with hashing 8 bytes at 
once
+    int32_t a = 0x11223344;
+    int32_t b = 0x55667788;
+    uint32_t seed = 0;
+
+    uint32_t chained = HashUtil::crc32c_fixed(a, seed);
+    chained = HashUtil::crc32c_fixed(b, chained);
+
+    // Reference: hash the 8 bytes sequentially via crc32c::Extend
+    uint8_t buf[8];
+    memcpy(buf, &a, 4);
+    memcpy(buf + 4, &b, 4);
+    uint32_t reference = crc32c::Extend(seed, buf, 8);
+
+    EXPECT_EQ(chained, reference);
+}
+*/
+TEST(ZlibCRC32Fixed, IncrementalChaining) {
+    // Hash two int32 values incrementally and compare with hashing 8 bytes at 
once
+    int32_t a = 0x11223344;
+    int32_t b = 0x55667788;
+    uint32_t seed = 0;
+
+    uint32_t chained = HashUtil::zlib_crc32_fixed(a, seed);
+    chained = HashUtil::zlib_crc32_fixed(b, chained);
+
+    // Reference: hash the 8 bytes sequentially via zlib crc32
+    uint8_t buf[8];
+    memcpy(buf, &a, 4);
+    memcpy(buf + 4, &b, 4);
+    uint32_t reference = (uint32_t)crc32(seed, buf, 8);
+
+    EXPECT_EQ(chained, reference);
+}
+/*
+// ==================== Exhaustive 1-byte test ====================
+
+TEST(CRC32CFixed, AllByteValues) {
+    for (int i = 0; i <= 255; i++) {
+        uint8_t v = static_cast<uint8_t>(i);
+        uint32_t seed = 0x12345678U;
+        EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) 
<< "byte=" << i;
+    }
+}
+
+TEST(ZlibCRC32Fixed, AllByteValues) {
+    for (int i = 0; i <= 255; i++) {
+        uint8_t v = static_cast<uint8_t>(i);
+        uint32_t seed = 0x12345678U;
+        EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, 
seed))
+                << "byte=" << i;
+    }
+}
+
+// ==================== Sequential pattern ====================
+
+TEST(CRC32CFixed, SequentialInt32) {
+    // Hash a sequence of increasing int32 values, verify each against 
reference
+    uint32_t seed = 0;
+    for (int32_t i = -500; i <= 500; i++) {
+        EXPECT_EQ(HashUtil::crc32c_fixed(i, seed), crc32c_reference(i, seed)) 
<< "i=" << i;
+    }
+}
+
+TEST(ZlibCRC32Fixed, SequentialInt32) {
+    uint32_t seed = 0;
+    for (int32_t i = -500; i <= 500; i++) {
+        EXPECT_EQ(HashUtil::zlib_crc32_fixed(i, seed), zlib_crc32_reference(i, 
seed)) << "i=" << i;
+    }
+}
+*/
+// ==================== Large 16-byte type fallback test ====================
+
+TEST(ZlibCRC32Fixed, LargeTypeFallback) {
+    // __int128 is 16 bytes, should hit the fallback path to zlib crc32()
+    __int128 value = static_cast<__int128>(0x0102030405060708ULL) << 64 | 
0x090A0B0C0D0E0F10ULL;
+    uint32_t seed = 0;
+    uint32_t fixed_result = HashUtil::zlib_crc32_fixed(value, seed);
+    uint32_t ref_result = HashUtil::zlib_crc_hash(&value, sizeof(value), seed);
+    EXPECT_EQ(fixed_result, ref_result);
+}
+
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to