This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 6e5976c  feat: support decimal literal and refactor transform 
utilities (#238)
6e5976c is described below

commit 6e5976c7fbc630507e4aa3706e55382aaa4d5267
Author: Junwang Zhao <[email protected]>
AuthorDate: Sun Oct 19 12:56:53 2025 +0800

    feat: support decimal literal and refactor transform utilities (#238)
---
 src/iceberg/CMakeLists.txt             |   3 +
 src/iceberg/expression/literal.cc      |  26 +-
 src/iceberg/expression/literal.h       |  17 +-
 src/iceberg/manifest_adapter.cc        |   5 +-
 src/iceberg/meson.build                |   3 +
 src/iceberg/test/CMakeLists.txt        |   2 +
 src/iceberg/test/bucket_util_test.cc   |  81 ++++
 src/iceberg/test/decimal_test.cc       |  98 +++++
 src/iceberg/test/literal_test.cc       |  54 +++
 src/iceberg/test/meson.build           |   2 +
 src/iceberg/test/transform_test.cc     | 767 +++++++++++++++++++--------------
 src/iceberg/test/truncate_util_test.cc |  53 +++
 src/iceberg/transform_function.cc      | 231 +---------
 src/iceberg/transform_function.h       |   3 +
 src/iceberg/type_fwd.h                 |   3 +
 src/iceberg/util/bucket_util.cc        | 147 +++++++
 src/iceberg/util/bucket_util.h         |  60 +++
 src/iceberg/util/conversions.cc        |  17 +-
 src/iceberg/util/decimal.cc            | 110 ++++-
 src/iceberg/util/decimal.h             |  13 +-
 src/iceberg/util/temporal_util.cc      | 239 ++++++++++
 src/iceberg/util/temporal_util.h       |  43 ++
 src/iceberg/util/truncate_util.cc      | 107 +++++
 src/iceberg/util/truncate_util.h       |  21 +-
 24 files changed, 1548 insertions(+), 557 deletions(-)

diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index e370950..a13f095 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -59,11 +59,14 @@ set(ICEBERG_SOURCES
     transform.cc
     transform_function.cc
     type.cc
+    util/bucket_util.cc
     util/conversions.cc
     util/decimal.cc
     util/gzip_internal.cc
     util/murmurhash3_internal.cc
+    util/temporal_util.cc
     util/timepoint.cc
+    util/truncate_util.cc
     util/uuid.cc
     v1_metadata.cc
     v2_metadata.cc
diff --git a/src/iceberg/expression/literal.cc 
b/src/iceberg/expression/literal.cc
index 18a46c6..aea719c 100644
--- a/src/iceberg/expression/literal.cc
+++ b/src/iceberg/expression/literal.cc
@@ -24,9 +24,9 @@
 #include <cstdint>
 #include <string>
 
-#include "iceberg/type_fwd.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/conversions.h"
+#include "iceberg/util/macros.h"
 
 namespace iceberg {
 
@@ -188,11 +188,14 @@ Result<Literal> LiteralCaster::CastFromString(
   const auto& str_val = std::get<std::string>(literal.value_);
 
   switch (target_type->type_id()) {
+    case TypeId::kUuid: {
+      ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
+      return Literal::UUID(uuid);
+    }
     case TypeId::kDate:
     case TypeId::kTime:
     case TypeId::kTimestamp:
     case TypeId::kTimestampTz:
-    case TypeId::kUuid:
       return NotImplemented("Cast from String to {} is not implemented yet",
                             target_type->ToString());
     default:
@@ -296,6 +299,10 @@ Literal Literal::Fixed(std::vector<uint8_t> value) {
   return {Value{std::move(value)}, fixed(size)};
 }
 
+Literal Literal::Decimal(int128_t value, int32_t precision, int32_t scale) {
+  return {Value{::iceberg::Decimal(value)}, decimal(precision, scale)};
+}
+
 Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
                                      std::shared_ptr<PrimitiveType> type) {
   return Conversions::FromBytes(std::move(type), data);
@@ -385,6 +392,15 @@ std::partial_ordering Literal::operator<=>(const Literal& 
other) const {
       return CompareFloat(this_val, other_val);
     }
 
+    case TypeId::kDecimal: {
+      auto& this_val = std::get<::iceberg::Decimal>(value_);
+      auto& other_val = std::get<::iceberg::Decimal>(other.value_);
+      const auto& this_decimal_type = 
internal::checked_cast<DecimalType&>(*type_);
+      const auto& other_decimal_type = 
internal::checked_cast<DecimalType&>(*other.type_);
+      return ::iceberg::Decimal::Compare(this_val, other_val, 
this_decimal_type.scale(),
+                                         other_decimal_type.scale());
+    }
+
     case TypeId::kString: {
       auto& this_val = std::get<std::string>(value_);
       auto& other_val = std::get<std::string>(other.value_);
@@ -440,6 +456,12 @@ std::string Literal::ToString() const {
     case TypeId::kDouble: {
       return std::to_string(std::get<double>(value_));
     }
+    case TypeId::kDecimal: {
+      const auto& decimal_type = internal::checked_cast<DecimalType&>(*type_);
+      const auto& decimal = std::get<::iceberg::Decimal>(value_);
+      return decimal.ToString(decimal_type.scale())
+          .value_or("invalid literal of type decimal");
+    }
     case TypeId::kString: {
       return "\"" + std::get<std::string>(value_) + "\"";
     }
diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h
index 70ff2d8..13ffafe 100644
--- a/src/iceberg/expression/literal.h
+++ b/src/iceberg/expression/literal.h
@@ -27,7 +27,9 @@
 
 #include "iceberg/result.h"
 #include "iceberg/type.h"
+#include "iceberg/util/decimal.h"
 #include "iceberg/util/formattable.h"
+#include "iceberg/util/int128.h"
 #include "iceberg/util/uuid.h"
 
 namespace iceberg {
@@ -57,9 +59,9 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
                              float,           // for float
                              double,          // for double
                              std::string,     // for string
-                             Uuid,            // for uuid
-                             std::vector<uint8_t>,     // for binary, fixed
-                             std::array<uint8_t, 16>,  // for decimal
+                             std::vector<uint8_t>,  // for binary, fixed
+                             ::iceberg::Decimal,    // for decimal
+                             Uuid,                  // for uuid
                              BelowMin, AboveMax>;
 
   /// \brief Factory methods for primitive types
@@ -77,6 +79,10 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
   static Literal Binary(std::vector<uint8_t> value);
   static Literal Fixed(std::vector<uint8_t> value);
 
+  /// \brief Create a decimal literal.
+  /// \param value The unscaled 128-bit integer value.
+  static Literal Decimal(int128_t value, int32_t precision, int32_t scale);
+
   /// \brief Create a literal representing a null value.
   static Literal Null(std::shared_ptr<PrimitiveType> type) {
     return {Value{std::monostate{}}, std::move(type)};
@@ -205,6 +211,11 @@ struct LiteralTraits<TypeId::kDouble> {
   using ValueType = double;
 };
 
+template <>
+struct LiteralTraits<TypeId::kDecimal> {
+  using ValueType = Decimal;
+};
+
 template <>
 struct LiteralTraits<TypeId::kString> {
   using ValueType = std::string;
diff --git a/src/iceberg/manifest_adapter.cc b/src/iceberg/manifest_adapter.cc
index bc0f834..c2ac30e 100644
--- a/src/iceberg/manifest_adapter.cc
+++ b/src/iceberg/manifest_adapter.cc
@@ -220,9 +220,12 @@ Status ManifestEntryAdapter::AppendPartitionValues(
         break;
       case TypeId::kDecimal:
         ICEBERG_RETURN_UNEXPECTED(AppendField(
-            child_array, std::get<std::array<uint8_t, 
16>>(partition_value.value())));
+            child_array, 
std::get<Decimal>(partition_value.value()).ToBytes()));
         break;
       case TypeId::kUuid:
+        ICEBERG_RETURN_UNEXPECTED(
+            AppendField(child_array, 
std::get<Uuid>(partition_value.value()).bytes()));
+        break;
       case TypeId::kStruct:
       case TypeId::kList:
       case TypeId::kMap:
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 25bfdc6..1b24f85 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -81,11 +81,14 @@ iceberg_sources = files(
     'transform.cc',
     'transform_function.cc',
     'type.cc',
+    'util/bucket_util.cc',
     'util/conversions.cc',
     'util/decimal.cc',
     'util/gzip_internal.cc',
     'util/murmurhash3_internal.cc',
+    'util/temporal_util.cc',
     'util/timepoint.cc',
+    'util/truncate_util.cc',
     'util/uuid.cc',
     'v1_metadata.cc',
     'v2_metadata.cc',
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 7c62a2a..68af62b 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -99,11 +99,13 @@ add_iceberg_test(json_serde_test
 
 add_iceberg_test(util_test
                  SOURCES
+                 bucket_util_test.cc
                  config_test.cc
                  decimal_test.cc
                  endian_test.cc
                  formatter_test.cc
                  string_util_test.cc
+                 truncate_util_test.cc
                  uuid_test.cc
                  visit_type_test.cc)
 
diff --git a/src/iceberg/test/bucket_util_test.cc 
b/src/iceberg/test/bucket_util_test.cc
new file mode 100644
index 0000000..69a04ef
--- /dev/null
+++ b/src/iceberg/test/bucket_util_test.cc
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/bucket_util.h"
+
+#include <chrono>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/util/decimal.h"
+#include "iceberg/util/uuid.h"
+
+namespace iceberg {
+
+// The following tests are from
+// https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements
+TEST(BucketUtilsTest, HashHelper) {
+  // int and long
+  EXPECT_EQ(BucketUtils::HashInt(34), 2017239379);
+  EXPECT_EQ(BucketUtils::HashLong(34L), 2017239379);
+
+  // decimal hash
+  auto decimal = Decimal::FromString("14.20");
+  ASSERT_TRUE(decimal.has_value());
+  EXPECT_EQ(BucketUtils::HashBytes(decimal->ToBigEndian()), -500754589);
+
+  // date hash
+  std::chrono::sys_days sd = std::chrono::year{2017} / 11 / 16;
+  std::chrono::sys_days epoch{std::chrono::year{1970} / 1 / 1};
+  int32_t days = (sd - epoch).count();
+  EXPECT_EQ(BucketUtils::HashInt(days), -653330422);
+
+  // time
+  // 22:31:08 in microseconds
+  int64_t time_micros = (22 * 3600 + 31 * 60 + 8) * 1000000LL;
+  EXPECT_EQ(BucketUtils::HashLong(time_micros), -662762989);
+
+  // timestamp
+  // 2017-11-16T22:31:08 in microseconds
+  std::chrono::system_clock::time_point tp =
+      std::chrono::sys_days{std::chrono::year{2017} / 11 / 16} + 
std::chrono::hours{22} +
+      std::chrono::minutes{31} + std::chrono::seconds{8};
+  int64_t timestamp_micros =
+      
std::chrono::duration_cast<std::chrono::microseconds>(tp.time_since_epoch())
+          .count();
+  EXPECT_EQ(BucketUtils::HashLong(timestamp_micros), -2047944441);
+  // 2017-11-16T22:31:08.000001 in microseconds
+  EXPECT_EQ(BucketUtils::HashLong(timestamp_micros + 1), -1207196810);
+
+  // string
+  std::string str = "iceberg";
+  EXPECT_EQ(BucketUtils::HashBytes(std::span<const uint8_t>(
+                reinterpret_cast<const uint8_t*>(str.data()), str.size())),
+            1210000089);
+
+  // uuid
+  auto uuid = Uuid::FromString("f79c3e09-677c-4bbd-a479-3f349cb785e7");
+  EXPECT_EQ(BucketUtils::HashBytes(uuid->bytes()), 1488055340);
+
+  // fixed & binary
+  std::vector<uint8_t> fixed = {0, 1, 2, 3};
+  EXPECT_EQ(BucketUtils::HashBytes(fixed), -188683207);
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/test/decimal_test.cc b/src/iceberg/test/decimal_test.cc
index 6850d7a..71ba674 100644
--- a/src/iceberg/test/decimal_test.cc
+++ b/src/iceberg/test/decimal_test.cc
@@ -490,6 +490,50 @@ TEST(DecimalTest, FromBigEndianInvalid) {
               IsError(ErrorKind::kInvalidArgument));
 }
 
+TEST(DecimalTest, ToBigEndian) {
+  std::vector<int64_t> high_values = {0,
+                                      1,
+                                      -1,
+                                      INT32_MAX,
+                                      INT32_MIN,
+                                      static_cast<int64_t>(INT32_MAX) + 1,
+                                      static_cast<int64_t>(INT32_MIN) - 1,
+                                      INT64_MAX,
+                                      INT64_MIN};
+  std::vector<uint64_t> low_values = {0,
+                                      1,
+                                      255,
+                                      UINT32_MAX,
+                                      static_cast<uint64_t>(UINT32_MAX) + 1,
+                                      static_cast<uint64_t>(UINT32_MAX) + 2,
+                                      static_cast<uint64_t>(UINT32_MAX) + 3,
+                                      static_cast<uint64_t>(UINT32_MAX) + 4,
+                                      static_cast<uint64_t>(UINT32_MAX) + 5,
+                                      static_cast<uint64_t>(UINT32_MAX) + 6,
+                                      static_cast<uint64_t>(UINT32_MAX) + 7,
+                                      static_cast<uint64_t>(UINT32_MAX) + 8,
+                                      UINT64_MAX};
+
+  for (int64_t high : high_values) {
+    for (uint64_t low : low_values) {
+      Decimal decimal(high, low);
+      auto bytes = decimal.ToBigEndian();
+      auto result = Decimal::FromBigEndian(bytes.data(), bytes.size());
+      ASSERT_THAT(result, IsOk());
+      EXPECT_EQ(result.value(), decimal);
+    }
+  }
+
+  for (int128_t value : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, -255, 
-1, 0, 1, 255,
+                                              256, INT32_MAX, INT64_MAX}) {
+    Decimal decimal(value);
+    auto bytes = decimal.ToBigEndian();
+    auto result = Decimal::FromBigEndian(bytes.data(), bytes.size());
+    ASSERT_THAT(result, IsOk());
+    EXPECT_EQ(result.value(), decimal);
+  }
+}
+
 TEST(DecimalTestFunctionality, Multiply) {
   ASSERT_EQ(Decimal(60501), Decimal(301) * Decimal(201));
   ASSERT_EQ(Decimal(-60501), Decimal(-301) * Decimal(201));
@@ -671,4 +715,58 @@ TEST(DecimalTest, Rescale) {
   ASSERT_THAT(Decimal(5555555).Rescale(6, 1), IsError(ErrorKind::kInvalid));
 }
 
+TEST(DecimalTest, Compare) {
+  // max positive unscaled value
+  // 10^38 - 1 scale cause overflow
+  ASSERT_EQ(Decimal::Compare(Decimal("99999999999999999999999999999999999999"),
+                             
Decimal("99999999999999999999999999999999999999"), 2, 3),
+            std::partial_ordering::greater);
+  // 10^37 - 1 scale no overflow
+  ASSERT_EQ(Decimal::Compare(Decimal("9999999999999999999999999999999999999"),
+                             
Decimal("99999999999999999999999999999999999999"), 2, 3),
+            std::partial_ordering::less);
+
+  // min negative unscaled value
+  // -10^38 + 1 scale cause overflow
+  
ASSERT_EQ(Decimal::Compare(Decimal("-99999999999999999999999999999999999999"),
+                             
Decimal("-99999999999999999999999999999999999999"), 2, 3),
+            std::partial_ordering::less);
+  // -10^37 + 1 scale no overflow
+  ASSERT_EQ(Decimal::Compare(Decimal("-9999999999999999999999999999999999999"),
+                             
Decimal("-99999999999999999999999999999999999999"), 2, 3),
+            std::partial_ordering::greater);
+
+  // equal values with different scales
+  ASSERT_EQ(Decimal::Compare(Decimal("123456789"), Decimal("1234567890"), 2, 
3),
+            std::partial_ordering::equivalent);
+  ASSERT_EQ(Decimal::Compare(Decimal("-1234567890"), Decimal("-123456789"), 3, 
2),
+            std::partial_ordering::equivalent);
+
+  // different values with different scales
+  ASSERT_EQ(Decimal::Compare(Decimal("123456788"), Decimal("1234567890"), 2, 
3),
+            std::partial_ordering::less);
+  ASSERT_EQ(Decimal::Compare(Decimal("-1234567890"), Decimal("-123456788"), 2, 
3),
+            std::partial_ordering::less);
+
+  // different values with same scales
+  ASSERT_EQ(Decimal::Compare(Decimal("123456790"), Decimal("123456789"), 2, 2),
+            std::partial_ordering::greater);
+  ASSERT_EQ(Decimal::Compare(Decimal("-123456790"), Decimal("-123456789"), 2, 
2),
+            std::partial_ordering::less);
+
+  // different signs
+  ASSERT_EQ(Decimal::Compare(Decimal("123456789"), Decimal("-123456789"), 2, 
3),
+            std::partial_ordering::greater);
+  ASSERT_EQ(Decimal::Compare(Decimal("-123456789"), Decimal("123456789"), 2, 
3),
+            std::partial_ordering::less);
+
+  // zero comparisons
+  ASSERT_EQ(Decimal::Compare(Decimal("0"), Decimal("0"), 2, 3),
+            std::partial_ordering::equivalent);
+  ASSERT_EQ(Decimal::Compare(Decimal("0"), Decimal("123456789"), 2, 3),
+            std::partial_ordering::less);
+  ASSERT_EQ(Decimal::Compare(Decimal("-123456789"), Decimal("0"), 2, 3),
+            std::partial_ordering::less);
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc
index 6e4b2aa..0dd291d 100644
--- a/src/iceberg/test/literal_test.cc
+++ b/src/iceberg/test/literal_test.cc
@@ -256,6 +256,20 @@ TEST(LiteralTest, DoubleZeroComparison) {
   EXPECT_EQ(neg_zero <=> pos_zero, std::partial_ordering::less);
 }
 
+TEST(LiteralTest, UuidComparison) {
+  auto uuid1 = 
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value();
+  auto uuid2 = 
Uuid::FromString("123e4567-e89b-12d3-a456-426614174001").value();
+  auto uuid3 = 
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value();
+
+  auto literal1 = Literal::UUID(uuid1);
+  auto literal2 = Literal::UUID(uuid2);
+  auto literal3 = Literal::UUID(uuid3);
+
+  EXPECT_EQ(literal1 <=> literal3, std::partial_ordering::equivalent);
+  EXPECT_EQ(literal1 <=> literal2, std::partial_ordering::unordered);
+  EXPECT_EQ(literal2 <=> literal1, std::partial_ordering::unordered);
+}
+
 // Parameter struct for literal serialization and deserialization tests
 struct LiteralParam {
   std::string test_name;
@@ -346,6 +360,17 @@ INSTANTIATE_TEST_SUITE_P(
                      Literal::Double(std::numeric_limits<double>::lowest()),
                      float64()},
 
+        // Decimal type
+        LiteralParam{"DecimalPositive",
+                     {1, 226, 64},
+                     Literal::Decimal(123456, 6, 2),
+                     decimal(6, 2)},
+        LiteralParam{"DecimalNegative",
+                     {254, 29, 192},
+                     Literal::Decimal(-123456, 6, 2),
+                     decimal(6, 2)},
+        LiteralParam{"DecimalZero", {0}, Literal::Decimal(0, 3, 0), decimal(3, 
0)},
+
         LiteralParam{"String",
                      {105, 99, 101, 98, 101, 114, 103},
                      Literal::String("iceberg"),
@@ -506,10 +531,28 @@ INSTANTIATE_TEST_SUITE_P(
                               .literal = Literal::Double(std::numbers::pi),
                               .expected_type_id = TypeId::kDouble,
                               .expected_string = "3.141593"},
+        BasicLiteralTestParam{.test_name = "DecimalPositive",
+                              .literal = Literal::Decimal(123456, 6, 2),
+                              .expected_type_id = TypeId::kDecimal,
+                              .expected_string = "1234.56"},
+        BasicLiteralTestParam{.test_name = "DecimalNegative",
+                              .literal = Literal::Decimal(-123456, 6, 2),
+                              .expected_type_id = TypeId::kDecimal,
+                              .expected_string = "-1234.56"},
+        BasicLiteralTestParam{.test_name = "DecimalZero",
+                              .literal = Literal::Decimal(0, 3, 0),
+                              .expected_type_id = TypeId::kDecimal,
+                              .expected_string = "0"},
         BasicLiteralTestParam{.test_name = "String",
                               .literal = Literal::String("hello world"),
                               .expected_type_id = TypeId::kString,
                               .expected_string = "\"hello world\""},
+        BasicLiteralTestParam{
+            .test_name = "Uuid",
+            .literal = Literal::UUID(
+                
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
+            .expected_type_id = TypeId::kUuid,
+            .expected_string = "123e4567-e89b-12d3-a456-426614174000"},
         BasicLiteralTestParam{
             .test_name = "Binary",
             .literal = Literal::Binary(std::vector<uint8_t>{0x01, 0x02, 0x03, 
0xFF}),
@@ -563,6 +606,10 @@ INSTANTIATE_TEST_SUITE_P(
                                    .small_literal = Literal::Double(1.5),
                                    .large_literal = Literal::Double(2.5),
                                    .equal_literal = Literal::Double(1.5)},
+        ComparisonLiteralTestParam{.test_name = "Decimal",
+                                   .small_literal = Literal::Decimal(123456, 
6, 2),
+                                   .large_literal = Literal::Decimal(234567, 
6, 2),
+                                   .equal_literal = Literal::Decimal(123456, 
6, 2)},
         ComparisonLiteralTestParam{.test_name = "String",
                                    .small_literal = Literal::String("apple"),
                                    .large_literal = Literal::String("banana"),
@@ -672,6 +719,13 @@ INSTANTIATE_TEST_SUITE_P(
                              .target_type = fixed(4),
                              .expected_literal = 
Literal::Fixed(std::vector<uint8_t>{
                                  0x01, 0x02, 0x03, 0x04})},
+        // String cast tests
+        CastLiteralTestParam{
+            .test_name = "StringToUuid",
+            .source_literal = 
Literal::String("123e4567-e89b-12d3-a456-426614174000"),
+            .target_type = uuid(),
+            .expected_literal = Literal::UUID(
+                
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
         // Same type cast test
         CastLiteralTestParam{.test_name = "IntToInt",
                              .source_literal = Literal::Int(42),
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index dd3bd05..88b1632 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -68,11 +68,13 @@ iceberg_tests = {
     },
     'util_test': {
         'sources': files(
+            'bucket_util_test.cc',
             'config_test.cc',
             'decimal_test.cc',
             'endian_test.cc',
             'formatter_test.cc',
             'string_util_test.cc',
+            'truncate_util_test.cc',
             'uuid_test.cc',
             'visit_type_test.cc',
         ),
diff --git a/src/iceberg/test/transform_test.cc 
b/src/iceberg/test/transform_test.cc
index c1efcb5..1003b95 100644
--- a/src/iceberg/test/transform_test.cc
+++ b/src/iceberg/test/transform_test.cc
@@ -21,11 +21,13 @@
 
 #include <format>
 #include <memory>
+#include <string>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include "iceberg/expression/literal.h"
+#include "iceberg/transform_function.h"
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
 #include "matchers.h"
@@ -63,6 +65,7 @@ TEST(TransformFunctionTest, CreateTruncateTransform) {
   auto transformPtr = transform->Bind(iceberg::string());
   EXPECT_EQ(transformPtr.value()->transform_type(), TransformType::kTruncate);
 }
+
 TEST(TransformFromStringTest, PositiveCases) {
   struct Case {
     std::string str;
@@ -187,373 +190,485 @@ TEST(TransformResultTypeTest, NegativeCases) {
   }
 }
 
-TEST(TransformLiteralTest, IdentityTransform) {
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    Literal source;
-    Literal expected;
-  };
+// Parameterized tests for transform functions
+struct TransformParam {
+  std::string str;
+  // The integer parameter associated with the transform.
+  int32_t param;
+  std::shared_ptr<Type> source_type;
+  Literal source;
+  Literal expected;
+};
 
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::boolean(),
-       .source = Literal::Boolean(true),
-       .expected = Literal::Boolean(true)},
-      {.source_type = iceberg::int32(),
-       .source = Literal::Int(42),
-       .expected = Literal::Int(42)},
-      {.source_type = iceberg::int32(),
-       .source = Literal::Date(30000),
-       .expected = Literal::Date(30000)},
-      {.source_type = iceberg::int64(),
-       .source = Literal::Long(1234567890),
-       .expected = Literal::Long(1234567890)},
-      {.source_type = iceberg::timestamp(),
-       .source = Literal::Timestamp(1622547800000000),
-       .expected = Literal::Timestamp(1622547800000000)},
-      {.source_type = iceberg::timestamp_tz(),
-       .source = Literal::TimestampTz(1622547800000000),
-       .expected = Literal::TimestampTz(1622547800000000)},
-      {.source_type = iceberg::float32(),
-       .source = Literal::Float(3.14),
-       .expected = Literal::Float(3.14)},
-      {.source_type = iceberg::float64(),
-       .source = Literal::Double(1.23e-5),
-       .expected = Literal::Double(1.23e-5)},
-      {.source_type = iceberg::string(),
-       .source = Literal::String("Hello, World!"),
-       .expected = Literal::String("Hello, World!")},
-      {.source_type = iceberg::binary(),
-       .source = Literal::Binary({0x01, 0x02, 0x03}),
-       .expected = Literal::Binary({0x01, 0x02, 0x03})},
-  };
+class TransformLiteralTest : public ::testing::TestWithParam<TransformParam> 
{};
 
-  for (const auto& c : cases) {
-    auto transform = Transform::Identity();
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind identity 
transform";
+TEST_P(TransformLiteralTest, IdentityTransform) {
+  const auto& param = GetParam();
 
-    auto result = transformPtr.value()->Transform(c.source);
-    ASSERT_TRUE(result.has_value())
-        << "Failed to transform literal: " << c.source.ToString();
+  auto transform = Transform::Identity();
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind identity transform";
 
-    EXPECT_EQ(result.value(), c.expected)
-        << "Unexpected result for source: " << c.source.ToString();
-  }
+  auto result = transformPtr.value()->Transform(param.source);
+  ASSERT_TRUE(result.has_value())
+      << "Failed to transform literal: " << param.source.ToString();
+
+  EXPECT_EQ(result.value(), param.expected)
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformLiteralTest, BucketTransform) {
+INSTANTIATE_TEST_SUITE_P(
+    IdentityTransformTests, TransformLiteralTest,
+    ::testing::Values(
+        TransformParam{.str = "BooleanTrue",
+                       .source_type = iceberg::boolean(),
+                       .source = Literal::Boolean(true),
+                       .expected = Literal::Boolean(true)},
+        TransformParam{.str = "BooleanFalse",
+                       .source_type = iceberg::boolean(),
+                       .source = Literal::Boolean(false),
+                       .expected = Literal::Boolean(false)},
+        TransformParam{.str = "Int32",
+                       .source_type = iceberg::int32(),
+                       .source = Literal::Int(42),
+                       .expected = Literal::Int(42)},
+        TransformParam{.str = "Date",
+                       .source_type = iceberg::int32(),
+                       .source = Literal::Date(30000),
+                       .expected = Literal::Date(30000)},
+        TransformParam{.str = "Int64",
+                       .source_type = iceberg::int64(),
+                       .source = Literal::Long(1234567890),
+                       .expected = Literal::Long(1234567890)},
+        TransformParam{.str = "Timestamp",
+                       .source_type = iceberg::timestamp(),
+                       .source = Literal::Timestamp(1622547800000000),
+                       .expected = Literal::Timestamp(1622547800000000)},
+        TransformParam{.str = "TimestampTz",
+                       .source_type = iceberg::timestamp_tz(),
+                       .source = Literal::TimestampTz(1622547800000000),
+                       .expected = Literal::TimestampTz(1622547800000000)},
+        TransformParam{.str = "Float",
+                       .source_type = iceberg::float32(),
+                       .source = Literal::Float(3.14),
+                       .expected = Literal::Float(3.14)},
+        TransformParam{.str = "Double",
+                       .source_type = iceberg::float64(),
+                       .source = Literal::Double(1.23e-5),
+                       .expected = Literal::Double(1.23e-5)},
+        TransformParam{.str = "Decimal",
+                       .source_type = iceberg::decimal(10, 2),
+                       .source = Literal::Decimal(123456, 10, 2),
+                       .expected = Literal::Decimal(123456, 10, 2)},
+        TransformParam{.str = "String",
+                       .source_type = iceberg::string(),
+                       .source = Literal::String("Hello, World!"),
+                       .expected = Literal::String("Hello, World!")},
+        TransformParam{
+            .str = "Uuid",
+            .source_type = iceberg::uuid(),
+            .source = Literal::UUID(
+                
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
+            .expected = Literal::UUID(
+                
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
+        TransformParam{.str = "Binary",
+                       .source_type = iceberg::binary(),
+                       .source = Literal::Binary({0x01, 0x02, 0x03}),
+                       .expected = Literal::Binary({0x01, 0x02, 0x03})},
+        TransformParam{.str = "Fixed",
+                       .source_type = iceberg::fixed(3),
+                       .source = Literal::Fixed({0x01, 0x02, 0x03}),
+                       .expected = Literal::Fixed({0x01, 0x02, 0x03})}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class BucketTransformTest : public ::testing::TestWithParam<TransformParam> {};
+
+TEST_P(BucketTransformTest, BucketTransform) {
   constexpr int32_t num_buckets = 4;
   auto transform = Transform::Bucket(num_buckets);
 
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    Literal source;
-    Literal expected;
-  };
+  const auto& param = GetParam();
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind bucket transform";
+  auto result = transformPtr.value()->Transform(param.source);
+  ASSERT_TRUE(result.has_value())
+      << "Failed to transform literal: " << param.source.ToString();
 
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::int32(),
-       .source = Literal::Int(42),
-       .expected = Literal::Int(3)},
-      {.source_type = iceberg::date(),
-       .source = Literal::Date(30000),
-       .expected = Literal::Int(2)},
-      {.source_type = iceberg::int64(),
-       .source = Literal::Long(1234567890),
-       .expected = Literal::Int(3)},
-      {.source_type = iceberg::timestamp(),
-       .source = Literal::Timestamp(1622547800000000),
-       .expected = Literal::Int(1)},
-      {.source_type = iceberg::timestamp_tz(),
-       .source = Literal::TimestampTz(1622547800000000),
-       .expected = Literal::Int(1)},
-      {.source_type = iceberg::string(),
-       .source = Literal::String("test"),
-       .expected = Literal::Int(3)},
-  };
-
-  for (const auto& c : cases) {
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind bucket transform";
-    auto result = transformPtr.value()->Transform(c.source);
-    ASSERT_TRUE(result.has_value())
-        << "Failed to transform literal: " << c.source.ToString();
-
-    EXPECT_EQ(result.value(), c.expected)
-        << "Unexpected result for source: " << c.source.ToString();
-  }
+  EXPECT_EQ(result.value(), param.expected)
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformLiteralTest, TruncateTransform) {
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    int32_t width;
-    Literal source;
-    Literal expected;
-  };
-
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::int32(),
-       .width = 5,
-       .source = Literal::Int(123456),
-       .expected = Literal::Int(123455)},
-      {.source_type = iceberg::string(),
-       .width = 5,
-       .source = Literal::String("Hello, World!"),
-       .expected = Literal::String("Hello")},
-      {.source_type = iceberg::string(),
-       .width = 5,
-       .source = Literal::String("😜🧐🤔🤪🥳😵‍💫😂"),
-       // Truncate to 5 utf-8 code points
-       .expected = Literal::String("😜🧐🤔🤪🥳")},
-      {.source_type = iceberg::string(),
-       .width = 8,
-       .source = Literal::String("a😜b🧐c🤔d🤪e🥳"),
-       .expected = Literal::String("a😜b🧐c🤔d🤪")},
-      {.source_type = iceberg::binary(),
-       .width = 5,
-       .source = Literal::Binary({0x01, 0x02, 0x03, 0x04, 0x05, 0x06}),
-       .expected = Literal::Binary({0x01, 0x02, 0x03, 0x04, 0x05})},
-  };
-
-  for (const auto& c : cases) {
-    auto transform = Transform::Truncate(c.width);
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind truncate 
transform";
-    auto result = transformPtr.value()->Transform(c.source);
-    ASSERT_TRUE(result.has_value())
-        << "Failed to transform literal: " << c.source.ToString();
-
-    EXPECT_EQ(result.value(), c.expected)
-        << "Unexpected result for source: " << c.source.ToString();
-  }
+INSTANTIATE_TEST_SUITE_P(
+    BucketTransformTests, BucketTransformTest,
+    ::testing::Values(
+        TransformParam{.str = "Int32",
+                       .source_type = iceberg::int32(),
+                       .source = Literal::Int(34),
+                       .expected = Literal::Int(3)},
+        TransformParam{.str = "Int64",
+                       .source_type = iceberg::int64(),
+                       .source = Literal::Long(34),
+                       .expected = Literal::Int(3)},
+        TransformParam{.str = "Decimal",
+                       // 14.20
+                       .source_type = iceberg::decimal(4, 2),
+                       .source = Literal::Decimal(1420, 4, 2),
+                       .expected = Literal::Int(3)},
+        TransformParam{.str = "Date",
+                       // 2017-11-16
+                       .source_type = iceberg::date(),
+                       .source = Literal::Date(17486),
+                       .expected = Literal::Int(2)},
+        TransformParam{.str = "Time",
+                       // 22:31:08 in microseconds
+                       .source_type = iceberg::time(),
+                       .source = Literal::Time(81068000000),
+                       .expected = Literal::Int(3)},
+        TransformParam{.str = "Timestamp",
+                       // 2017-11-16T22:31:08 in microseconds
+                       .source_type = iceberg::timestamp(),
+                       .source = Literal::Timestamp(1510871468000000),
+                       .expected = Literal::Int(3)},
+        TransformParam{.str = "TimestampTz",
+                       // 2017-11-16T22:31:08.000001 in microseconds
+                       .source_type = iceberg::timestamp_tz(),
+                       .source = Literal::TimestampTz(1510871468000001),
+                       .expected = Literal::Int(2)},
+        TransformParam{.str = "String",
+                       .source_type = iceberg::string(),
+                       .source = Literal::String("iceberg"),
+                       .expected = Literal::Int(1)},
+        TransformParam{
+            .str = "Uuid",
+            .source_type = iceberg::uuid(),
+            .source = Literal::UUID(
+                
Uuid::FromString("f79c3e09-677c-4bbd-a479-3f349cb785e7").value()),
+            .expected = Literal::Int(0)},
+        TransformParam{.str = "Fixed",
+                       .source_type = iceberg::fixed(4),
+                       .source = Literal::Fixed({0, 1, 2, 3}),
+                       .expected = Literal::Int(1)},
+        TransformParam{.str = "Binary",
+                       .source_type = iceberg::binary(),
+                       .source = Literal::Binary({0, 1, 2, 3}),
+                       .expected = Literal::Int(1)}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class TruncateTransformTest : public ::testing::TestWithParam<TransformParam> 
{};
+
+TEST_P(TruncateTransformTest, TruncateTransform) {
+  const auto& param = GetParam();
+  auto transform = Transform::Truncate(param.param);
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind truncate transform";
+  auto result = transformPtr.value()->Transform(param.source);
+  ASSERT_TRUE(result.has_value())
+      << "Failed to transform literal: " << param.source.ToString();
+
+  EXPECT_EQ(result.value(), param.expected)
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformLiteralTest, YearTransform) {
+INSTANTIATE_TEST_SUITE_P(
+    TruncateTransformTests, TruncateTransformTest,
+    ::testing::Values(
+        TransformParam{.str = "Int32",
+                       .param = 5,
+                       .source_type = iceberg::int32(),
+                       .source = Literal::Int(123456),
+                       .expected = Literal::Int(123455)},
+        TransformParam{.str = "Int64",
+                       .param = 10,
+                       .source_type = iceberg::int64(),
+                       .source = Literal::Long(-1),
+                       .expected = Literal::Long(-10)},
+        TransformParam{.str = "Decimal",
+                       .param = 50,
+                       .source_type = iceberg::decimal(5, 2),
+                       .source = Literal::Decimal(12345, 5, 2),
+                       .expected = Literal::Decimal(12300, 5, 2)},
+        TransformParam{.str = "StringShort",
+                       .param = 5,
+                       .source_type = iceberg::string(),
+                       .source = Literal::String("Hello, World!"),
+                       .expected = Literal::String("Hello")},
+        TransformParam{.str = "StringEmoji",
+                       .param = 5,
+                       .source_type = iceberg::string(),
+                       .source = Literal::String("😜🧐🤔🤪🥳😵‍💫😂"),
+                       .expected = Literal::String("😜🧐🤔🤪🥳")},
+        TransformParam{.str = "StringMixed",
+                       .param = 8,
+                       .source_type = iceberg::string(),
+                       .source = Literal::String("a😜b🧐c🤔d🤪e🥳"),
+                       .expected = Literal::String("a😜b🧐c🤔d🤪")},
+        TransformParam{.str = "Binary",
+                       .param = 5,
+                       .source_type = iceberg::binary(),
+                       .source = Literal::Binary({0x01, 0x02, 0x03, 0x04, 
0x05, 0x06}),
+                       .expected = Literal::Binary({0x01, 0x02, 0x03, 0x04, 
0x05})}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class YearTransformTest : public ::testing::TestWithParam<TransformParam> {};
+
+TEST_P(YearTransformTest, YearTransform) {
   auto transform = Transform::Year();
+  const auto& param = GetParam();
 
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    Literal source;
-    Literal expected;
-  };
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind year transform";
 
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::timestamp(),
-       // 2021-06-01T11:43:20Z
-       .source = Literal::Timestamp(1622547800000000),
-       .expected = Literal::Int(2021)},
-      {.source_type = iceberg::timestamp_tz(),
-       .source = Literal::TimestampTz(1622547800000000),
-       .expected = Literal::Int(2021)},
-      {.source_type = iceberg::date(),
-       .source = Literal::Date(30000),
-       .expected = Literal::Int(2052)},
-  };
+  auto result = transformPtr.value()->Transform(param.source);
+  ASSERT_TRUE(result.has_value())
+      << "Failed to transform literal: " << param.source.ToString();
 
-  for (const auto& c : cases) {
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind year transform";
-    auto result = transformPtr.value()->Transform(c.source);
-    ASSERT_TRUE(result.has_value())
-        << "Failed to transform literal: " << c.source.ToString();
-
-    EXPECT_EQ(result.value(), c.expected)
-        << "Unexpected result for source: " << c.source.ToString();
-  }
+  EXPECT_EQ(result.value(), param.expected)
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformLiteralTest, MonthTransform) {
+INSTANTIATE_TEST_SUITE_P(
+    YearTransformTests, YearTransformTest,
+    ::testing::Values(TransformParam{.str = "Timestamp",
+                                     // 2021-06-01T11:43:20Z
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Timestamp(1622547800000000),
+                                     .expected = Literal::Int(2021)},
+                      TransformParam{.str = "TimestampTz",
+                                     .source_type = iceberg::timestamp_tz(),
+                                     .source = 
Literal::TimestampTz(1622547800000000),
+                                     .expected = Literal::Int(2021)},
+                      TransformParam{.str = "Date",
+                                     .source_type = iceberg::date(),
+                                     .source = Literal::Date(30000),
+                                     .expected = Literal::Int(2052)}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class MonthTransformTest : public ::testing::TestWithParam<TransformParam> {};
+
+TEST_P(MonthTransformTest, MonthTransform) {
   auto transform = Transform::Month();
+  const auto& param = GetParam();
 
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    Literal source;
-    Literal expected;
-  };
-
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::timestamp(),
-       .source = Literal::Timestamp(1622547800000000),
-       .expected = Literal::Int(617)},
-      {.source_type = iceberg::timestamp_tz(),
-       .source = Literal::TimestampTz(1622547800000000),
-       .expected = Literal::Int(617)},
-      {.source_type = iceberg::date(),
-       .source = Literal::Date(30000),
-       .expected = Literal::Int(985)},
-  };
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind month transform";
 
-  for (const auto& c : cases) {
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind month transform";
-    auto result = transformPtr.value()->Transform(c.source);
-    ASSERT_TRUE(result.has_value())
-        << "Failed to transform literal: " << c.source.ToString();
+  auto result = transformPtr.value()->Transform(param.source);
+  ASSERT_TRUE(result.has_value())
+      << "Failed to transform literal: " << param.source.ToString();
 
-    EXPECT_EQ(result.value(), c.expected)
-        << "Unexpected result for source: " << c.source.ToString();
-  }
+  EXPECT_EQ(result.value(), param.expected)
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformFunctionTransformTest, DayTransform) {
+INSTANTIATE_TEST_SUITE_P(
+    MonthTransformTests, MonthTransformTest,
+    ::testing::Values(TransformParam{.str = "Timestamp",
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Timestamp(1622547800000000),
+                                     .expected = Literal::Int(617)},
+                      TransformParam{.str = "TimestampTz",
+                                     .source_type = iceberg::timestamp_tz(),
+                                     .source = 
Literal::TimestampTz(1622547800000000),
+                                     .expected = Literal::Int(617)},
+                      TransformParam{.str = "Date",
+                                     .source_type = iceberg::date(),
+                                     .source = Literal::Date(30000),
+                                     .expected = Literal::Int(985)}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class DayTransformTest : public ::testing::TestWithParam<TransformParam> {};
+
+TEST_P(DayTransformTest, DayTransform) {
   auto transform = Transform::Day();
+  const auto& param = GetParam();
 
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    Literal source;
-    Literal expected;
-  };
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind day transform";
 
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::timestamp(),
-       .source = Literal::Timestamp(1622547800000000),
-       .expected = Literal::Int(18779)},
-      {.source_type = iceberg::timestamp_tz(),
-       .source = Literal::TimestampTz(1622547800000000),
-       .expected = Literal::Int(18779)},
-      {.source_type = iceberg::date(),
-       .source = Literal::Date(30000),
-       .expected = Literal::Int(30000)},
-  };
-
-  for (const auto& c : cases) {
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind day transform";
-    auto result = transformPtr.value()->Transform(c.source);
-    ASSERT_TRUE(result.has_value())
-        << "Failed to transform literal: " << c.source.ToString();
+  auto result = transformPtr.value()->Transform(param.source);
+  ASSERT_TRUE(result.has_value())
+      << "Failed to transform literal: " << param.source.ToString();
 
-    EXPECT_EQ(result.value(), c.expected)
-        << "Unexpected result for source: " << c.source.ToString();
-  }
+  EXPECT_EQ(result.value(), param.expected)
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformLiteralTest, HourTransform) {
+INSTANTIATE_TEST_SUITE_P(
+    DayTransformTests, DayTransformTest,
+    ::testing::Values(TransformParam{.str = "Timestamp",
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Timestamp(1622547800000000),
+                                     .expected = Literal::Int(18779)},
+                      TransformParam{.str = "TimestampTz",
+                                     .source_type = iceberg::timestamp_tz(),
+                                     .source = 
Literal::TimestampTz(1622547800000000),
+                                     .expected = Literal::Int(18779)},
+                      TransformParam{.str = "Date",
+                                     .source_type = iceberg::date(),
+                                     .source = Literal::Date(30000),
+                                     .expected = Literal::Int(30000)}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class HourTransformTest : public ::testing::TestWithParam<TransformParam> {};
+
+TEST_P(HourTransformTest, HourTransform) {
   auto transform = Transform::Hour();
+  const auto& param = GetParam();
 
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    Literal source;
-    Literal expected;
-  };
-
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::timestamp(),
-       .source = Literal::Timestamp(1622547800000000),
-       .expected = Literal::Int(450707)},
-      {.source_type = iceberg::timestamp_tz(),
-       .source = Literal::TimestampTz(1622547800000000),
-       .expected = Literal::Int(450707)},
-  };
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind hour transform";
 
-  for (const auto& c : cases) {
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind hour transform";
-    auto result = transformPtr.value()->Transform(c.source);
-    ASSERT_TRUE(result.has_value())
-        << "Failed to transform literal: " << c.source.ToString();
+  auto result = transformPtr.value()->Transform(param.source);
+  ASSERT_TRUE(result.has_value())
+      << "Failed to transform literal: " << param.source.ToString();
 
-    EXPECT_EQ(result.value(), c.expected)
-        << "Unexpected result for source: " << c.source.ToString();
-  }
+  EXPECT_EQ(result.value(), param.expected)
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformLiteralTest, VoidTransform) {
+INSTANTIATE_TEST_SUITE_P(
+    HourTransformTests, HourTransformTest,
+    ::testing::Values(TransformParam{.str = "Timestamp",
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Timestamp(1622547800000000),
+                                     .expected = Literal::Int(450707)},
+                      TransformParam{.str = "TimestampTz",
+                                     .source_type = iceberg::timestamp_tz(),
+                                     .source = 
Literal::TimestampTz(1622547800000000),
+                                     .expected = Literal::Int(450707)}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class VoidTransformTest : public ::testing::TestWithParam<TransformParam> {};
+
+TEST_P(VoidTransformTest, VoidTransform) {
   auto transform = Transform::Void();
-
-  struct Case {
-    std::shared_ptr<Type> source_type;
-    Literal source;
-  };
-
-  const std::vector<Case> cases = {
-      {.source_type = iceberg::boolean(), .source = Literal::Boolean(true)},
-      {.source_type = iceberg::int32(), .source = Literal::Int(42)},
-      {.source_type = iceberg::date(), .source = Literal::Date(30000)},
-      {.source_type = iceberg::int64(), .source = Literal::Long(1234567890)},
-      {.source_type = iceberg::timestamp(),
-       .source = Literal::Timestamp(1622547800000000)},
-      {.source_type = iceberg::timestamp_tz(),
-       .source = Literal::TimestampTz(1622547800000000)},
-      {.source_type = iceberg::float32(), .source = Literal::Float(3.14)},
-      {.source_type = iceberg::float64(), .source = Literal::Double(1.23e-5)},
-      {.source_type = iceberg::string(), .source = Literal::String("Hello, 
World!")},
-      {.source_type = iceberg::binary(), .source = Literal::Binary({0x01, 
0x02, 0x03})},
-  };
-
-  for (const auto& c : cases) {
-    auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind void transform";
-    auto result = transformPtr.value()->Transform(c.source);
-    EXPECT_TRUE(result->IsNull())
-        << "Expected void transform to return null type for source: "
-        << c.source.ToString();
-    EXPECT_EQ(result->type()->type_id(), c.source_type->type_id())
-        << "Expected void transform to return same type as source for: "
-        << c.source.ToString();
-  }
+  const auto& param = GetParam();
+
+  auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind void transform";
+
+  auto result = transformPtr.value()->Transform(param.source);
+  EXPECT_TRUE(result->IsNull())
+      << "Expected void transform to return null type for source: "
+      << param.source.ToString();
+  EXPECT_EQ(result->type()->type_id(), param.source_type->type_id())
+      << "Expected void transform to return same type as source for: "
+      << param.source.ToString();
+  EXPECT_EQ(result->ToString(), param.expected.ToString())
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
-TEST(TransformLiteralTest, NullLiteral) {
-  struct Case {
-    std::string str;
-    std::shared_ptr<Type> source_type;
-    Literal source;
-    std::shared_ptr<Type> expected_result_type;
-  };
-
-  const std::vector<Case> cases = {
-      {.str = "identity",
-       .source_type = iceberg::string(),
-       .source = Literal::Null(iceberg::string()),
-       .expected_result_type = iceberg::string()},
-      {.str = "year",
-       .source_type = iceberg::timestamp(),
-       .source = Literal::Null(iceberg::timestamp()),
-       .expected_result_type = iceberg::int32()},
-      {.str = "month",
-       .source_type = iceberg::timestamp(),
-       .source = Literal::Null(iceberg::timestamp()),
-       .expected_result_type = iceberg::int32()},
-      {.str = "day",
-       .source_type = iceberg::timestamp(),
-       .source = Literal::Null(iceberg::timestamp()),
-       .expected_result_type = iceberg::int32()},
-      {.str = "hour",
-       .source_type = iceberg::timestamp(),
-       .source = Literal::Null(iceberg::timestamp()),
-       .expected_result_type = iceberg::int32()},
-      {.str = "void",
-       .source_type = iceberg::string(),
-       .source = Literal::Null(iceberg::string()),
-       .expected_result_type = iceberg::string()},
-      {.str = "bucket[16]",
-       .source_type = iceberg::string(),
-       .source = Literal::Null(iceberg::string()),
-       .expected_result_type = iceberg::int32()},
-      {.str = "truncate[32]",
-       .source_type = iceberg::string(),
-       .source = Literal::Null(iceberg::string()),
-       .expected_result_type = iceberg::string()},
-  };
-
-  for (const auto& c : cases) {
-    auto result = TransformFromString(c.str);
-    ASSERT_TRUE(result.has_value()) << "Failed to parse: " << c.str;
-
-    const auto& transform = result.value();
-    const auto transformPtr = transform->Bind(c.source_type);
-    ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind: " << c.str;
-
-    auto transform_result = transformPtr.value()->Transform(c.source);
-    EXPECT_TRUE(transform_result->IsNull())
-        << "Expected void transform to return null type for source: "
-        << c.source.ToString();
-    EXPECT_EQ(transform_result->type()->type_id(), 
c.expected_result_type->type_id())
-        << "Expected void transform to return same type as source for: "
-        << c.source.ToString();
-  }
+INSTANTIATE_TEST_SUITE_P(
+    VoidTransformTests, VoidTransformTest,
+    ::testing::Values(
+        TransformParam{.str = "Boolean",
+                       .source_type = iceberg::boolean(),
+                       .source = Literal::Boolean(true),
+                       .expected = Literal::Null(iceberg::boolean())},
+        TransformParam{.str = "Int32",
+                       .source_type = iceberg::int32(),
+                       .source = Literal::Int(42),
+                       .expected = Literal::Null(iceberg::int32())},
+        TransformParam{.str = "Date",
+                       .source_type = iceberg::date(),
+                       .source = Literal::Date(30000),
+                       .expected = Literal::Null(iceberg::date())},
+        TransformParam{.str = "Int64",
+                       .source_type = iceberg::int64(),
+                       .source = Literal::Long(1234567890),
+                       .expected = Literal::Null(iceberg::int64())},
+        TransformParam{.str = "Timestamp",
+                       .source_type = iceberg::timestamp(),
+                       .source = Literal::Timestamp(1622547800000000),
+                       .expected = Literal::Null(iceberg::timestamp())},
+        TransformParam{.str = "TimestampTz",
+                       .source_type = iceberg::timestamp_tz(),
+                       .source = Literal::TimestampTz(1622547800000000),
+                       .expected = Literal::Null(iceberg::timestamp_tz())},
+        TransformParam{.str = "Float",
+                       .source_type = iceberg::float32(),
+                       .source = Literal::Float(3.14),
+                       .expected = Literal::Null(iceberg::float32())},
+        TransformParam{.str = "Double",
+                       .source_type = iceberg::float64(),
+                       .source = Literal::Double(1.23e-5),
+                       .expected = Literal::Null(iceberg::float64())},
+        TransformParam{.str = "Decimal",
+                       .source_type = iceberg::decimal(10, 2),
+                       .source = Literal::Decimal(123456, 10, 2),
+                       .expected = Literal::Null(iceberg::decimal(10, 2))},
+        TransformParam{.str = "String",
+                       .source_type = iceberg::string(),
+                       .source = Literal::String("Hello, World!"),
+                       .expected = Literal::Null(iceberg::string())},
+        TransformParam{
+            .str = "Uuid",
+            .source_type = iceberg::uuid(),
+            .source = Literal::UUID(
+                
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
+            .expected = Literal::Null(iceberg::uuid())},
+        TransformParam{.str = "Binary",
+                       .source_type = iceberg::binary(),
+                       .source = Literal::Binary({0x01, 0x02, 0x03}),
+                       .expected = Literal::Null(iceberg::binary())},
+        TransformParam{.str = "Fixed",
+                       .source_type = iceberg::fixed(3),
+                       .source = Literal::Fixed({0x01, 0x02, 0x03}),
+                       .expected = Literal::Null(iceberg::fixed(3))}),
+    [](const ::testing::TestParamInfo<TransformParam>& info) { return 
info.param.str; });
+
+class NullLiteralTransformTest : public 
::testing::TestWithParam<TransformParam> {};
+
+TEST_P(NullLiteralTransformTest, NullLiteralTransform) {
+  const auto& param = GetParam();
+
+  auto result = TransformFromString(param.str);
+  ASSERT_TRUE(result.has_value()) << "Failed to parse: " << param.str;
+
+  const auto& transform = result.value();
+  const auto transformPtr = transform->Bind(param.source_type);
+  ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind: " << param.str;
+
+  auto transform_result = transformPtr.value()->Transform(param.source);
+  EXPECT_TRUE(transform_result->IsNull())
+      << "Expected transform to return null type for source: " << 
param.source.ToString();
+  EXPECT_EQ(transform_result->ToString(), param.expected.ToString())
+      << "Unexpected result for source: " << param.source.ToString();
 }
 
+INSTANTIATE_TEST_SUITE_P(
+    NullLiteralTransformTests, NullLiteralTransformTest,
+    ::testing::Values(TransformParam{.str = "identity",
+                                     .source_type = iceberg::string(),
+                                     .source = 
Literal::Null(iceberg::string()),
+                                     .expected = 
Literal::Null(iceberg::string())},
+                      TransformParam{.str = "year",
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Null(iceberg::timestamp()),
+                                     .expected = 
Literal::Null(iceberg::int32())},
+                      TransformParam{.str = "month",
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Null(iceberg::timestamp()),
+                                     .expected = 
Literal::Null(iceberg::int32())},
+                      TransformParam{.str = "day",
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Null(iceberg::timestamp()),
+                                     .expected = 
Literal::Null(iceberg::int32())},
+                      TransformParam{.str = "hour",
+                                     .source_type = iceberg::timestamp(),
+                                     .source = 
Literal::Null(iceberg::timestamp()),
+                                     .expected = 
Literal::Null(iceberg::int32())},
+                      TransformParam{.str = "void",
+                                     .source_type = iceberg::string(),
+                                     .source = 
Literal::Null(iceberg::string()),
+                                     .expected = 
Literal::Null(iceberg::string())},
+                      TransformParam{.str = "bucket[16]",
+                                     .source_type = iceberg::string(),
+                                     .source = 
Literal::Null(iceberg::string()),
+                                     .expected = 
Literal::Null(iceberg::int32())},
+                      TransformParam{.str = "truncate[32]",
+                                     .source_type = iceberg::string(),
+                                     .source = 
Literal::Null(iceberg::string()),
+                                     .expected = 
Literal::Null(iceberg::string())}));
+
 }  // namespace iceberg
diff --git a/src/iceberg/test/truncate_util_test.cc 
b/src/iceberg/test/truncate_util_test.cc
new file mode 100644
index 0000000..61010fc
--- /dev/null
+++ b/src/iceberg/test/truncate_util_test.cc
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/truncate_util.h"
+
+#include <gtest/gtest.h>
+
+#include "iceberg/expression/literal.h"
+
+namespace iceberg {
+
+// The following tests are from
+// https://iceberg.apache.org/spec/#truncate-transform-details
+TEST(TruncateUtilTest, TruncateLiteral) {
+  // Integer
+  EXPECT_EQ(TruncateUtils::TruncateLiteral(Literal::Int(1), 10), 
Literal::Int(0));
+  EXPECT_EQ(TruncateUtils::TruncateLiteral(Literal::Int(-1), 10), 
Literal::Int(-10));
+  EXPECT_EQ(TruncateUtils::TruncateLiteral(Literal::Long(1), 10), 
Literal::Long(0));
+  EXPECT_EQ(TruncateUtils::TruncateLiteral(Literal::Long(-1), 10), 
Literal::Long(-10));
+
+  // Decimal
+  EXPECT_EQ(TruncateUtils::TruncateLiteral(Literal::Decimal(1065, 4, 2), 50),
+            Literal::Decimal(1050, 4, 2));
+
+  // String
+  EXPECT_EQ(TruncateUtils::TruncateLiteral(Literal::String("iceberg"), 3),
+            Literal::String("ice"));
+
+  // Binary
+  std::string data = "\x01\x02\x03\x04\x05";
+  std::string expected = "\x01\x02\x03";
+  EXPECT_EQ(TruncateUtils::TruncateLiteral(
+                Literal::Binary(std::vector<uint8_t>(data.begin(), 
data.end())), 3),
+            Literal::Binary(std::vector<uint8_t>(expected.begin(), 
expected.end())));
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/transform_function.cc 
b/src/iceberg/transform_function.cc
index fd9a165..e2f5ece 100644
--- a/src/iceberg/transform_function.cc
+++ b/src/iceberg/transform_function.cc
@@ -20,16 +20,14 @@
 #include "iceberg/transform_function.h"
 
 #include <cassert>
-#include <chrono>
-#include <type_traits>
-#include <utility>
-#include <variant>
 
 #include "iceberg/expression/literal.h"
 #include "iceberg/type.h"
-#include "iceberg/util/murmurhash3_internal.h"
+#include "iceberg/type_fwd.h"
+#include "iceberg/util/bucket_util.h"
+#include "iceberg/util/macros.h"
+#include "iceberg/util/temporal_util.h"
 #include "iceberg/util/truncate_util.h"
-#include "iceberg/util/uuid.h"
 
 namespace iceberg {
 
@@ -54,48 +52,14 @@ BucketTransform::BucketTransform(std::shared_ptr<Type> 
const& source_type,
     : TransformFunction(TransformType::kBucket, source_type), 
num_buckets_(num_buckets) {}
 
 Result<Literal> BucketTransform::Transform(const Literal& literal) {
-  assert(literal.type() == source_type());
-  if (literal.IsBelowMin() || literal.IsAboveMax()) {
-    return InvalidArgument(
-        "Cannot apply bucket transform to literal with value {} of type {}",
-        literal.ToString(), source_type()->ToString());
-  }
+  ICEBERG_DCHECK(*literal.type() == *source_type(),
+                 "Literal type must match source type");
   if (literal.IsNull()) [[unlikely]] {
     return Literal::Null(int32());
   }
 
-  int32_t hash_value = 0;
-  std::visit(
-      [&](auto&& value) {
-        using T = std::decay_t<decltype(value)>;
-        if constexpr (std::is_same_v<T, int32_t>) {
-          MurmurHash3_x86_32(&value, sizeof(int32_t), 0, &hash_value);
-        } else if constexpr (std::is_same_v<T, int64_t>) {
-          MurmurHash3_x86_32(&value, sizeof(int64_t), 0, &hash_value);
-        } else if constexpr (std::is_same_v<T, std::array<uint8_t, 16>>) {
-          MurmurHash3_x86_32(value.data(), sizeof(uint8_t) * 16, 0, 
&hash_value);
-        } else if constexpr (std::is_same_v<T, std::string>) {
-          MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
-        } else if constexpr (std::is_same_v<T, Uuid>) {
-          MurmurHash3_x86_32(std::get<Uuid>(literal.value()).bytes().data(),
-                             Uuid::kLength, 0, &hash_value);
-        } else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
-          MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
-        } else if constexpr (std::is_same_v<T, std::monostate> ||
-                             std::is_same_v<T, bool> || std::is_same_v<T, 
float> ||
-                             std::is_same_v<T, double> ||
-                             std::is_same_v<T, Literal::BelowMin> ||
-                             std::is_same_v<T, Literal::AboveMax>) {
-          std::unreachable();
-        } else {
-          static_assert(false, "Unhandled type in BucketTransform::Transform");
-        }
-      },
-      literal.value());
-
-  // Calculate the bucket index
-  int32_t bucket_index =
-      (hash_value & std::numeric_limits<int32_t>::max()) % num_buckets_;
+  ICEBERG_ASSIGN_OR_RAISE(auto bucket_index,
+                          BucketUtils::BucketIndex(literal, num_buckets_))
 
   return Literal::Int(bucket_index);
 }
@@ -135,47 +99,9 @@ TruncateTransform::TruncateTransform(std::shared_ptr<Type> 
const& source_type,
     : TransformFunction(TransformType::kTruncate, source_type), width_(width) 
{}
 
 Result<Literal> TruncateTransform::Transform(const Literal& literal) {
-  assert(literal.type() == source_type());
-  if (literal.IsBelowMin() || literal.IsAboveMax()) {
-    return InvalidArgument(
-        "Cannot apply truncate transform to literal with value {} of type {}",
-        literal.ToString(), source_type()->ToString());
-  }
-  if (literal.IsNull()) [[unlikely]] {
-    // Return null as is
-    return literal;
-  }
-
-  switch (source_type()->type_id()) {
-    case TypeId::kInt: {
-      auto value = std::get<int32_t>(literal.value());
-      return Literal::Int(TruncateUtils::TruncateInteger(value, width_));
-    }
-    case TypeId::kLong: {
-      auto value = std::get<int64_t>(literal.value());
-      return Literal::Long(TruncateUtils::TruncateInteger(value, width_));
-    }
-    case TypeId::kDecimal: {
-      // TODO(zhjwpku): Handle decimal truncation logic here
-      return NotImplemented("Truncate for Decimal is not implemented yet");
-    }
-    case TypeId::kString: {
-      // Strings are truncated to a valid UTF-8 string with no more than L 
code points.
-      auto value = std::get<std::string>(literal.value());
-      return Literal::String(TruncateUtils::TruncateUTF8(std::move(value), 
width_));
-    }
-    case TypeId::kBinary: {
-      /// In contrast to strings, binary values do not have an assumed 
encoding and are
-      /// truncated to L bytes.
-      auto value = std::get<std::vector<uint8_t>>(literal.value());
-      if (value.size() > static_cast<size_t>(width_)) {
-        value.resize(width_);
-      }
-      return Literal::Binary(std::move(value));
-    }
-    default:
-      std::unreachable();
-  }
+  ICEBERG_DCHECK(*literal.type() == *source_type(),
+                 "Literal type must match source type");
+  return TruncateUtils::TruncateLiteral(literal, width_);
 }
 
 std::shared_ptr<Type> TruncateTransform::ResultType() const { return 
source_type(); }
@@ -206,34 +132,9 @@ YearTransform::YearTransform(std::shared_ptr<Type> const& 
source_type)
     : TransformFunction(TransformType::kTruncate, source_type) {}
 
 Result<Literal> YearTransform::Transform(const Literal& literal) {
-  assert(literal.type() == source_type());
-  if (literal.IsBelowMin() || literal.IsAboveMax()) {
-    return InvalidArgument(
-        "Cannot apply year transform to literal with value {} of type {}",
-        literal.ToString(), source_type()->ToString());
-  }
-  if (literal.IsNull()) [[unlikely]] {
-    return Literal::Null(int32());
-  }
-
-  using namespace std::chrono;  // NOLINT
-  switch (source_type()->type_id()) {
-    case TypeId::kDate: {
-      auto value = std::get<int32_t>(literal.value());
-      auto epoch = sys_days(year{1970} / January / 1);
-      auto ymd = year_month_day(epoch + days{value});
-      return Literal::Int(static_cast<int32_t>(ymd.year()));
-    }
-    case TypeId::kTimestamp:
-    case TypeId::kTimestampTz: {
-      auto value = std::get<int64_t>(literal.value());
-      // Convert microseconds-since-epoch into a `year_month_day` object
-      auto ymd = 
year_month_day(floor<days>(sys_time<microseconds>(microseconds{value})));
-      return Literal::Int(static_cast<int32_t>(ymd.year()));
-    }
-    default:
-      std::unreachable();
-  }
+  ICEBERG_DCHECK(*literal.type() == *source_type(),
+                 "Literal type must match source type");
+  return TemporalUtils::ExtractYear(literal);
 }
 
 std::shared_ptr<Type> YearTransform::ResultType() const { return int32(); }
@@ -259,46 +160,9 @@ MonthTransform::MonthTransform(std::shared_ptr<Type> 
const& source_type)
     : TransformFunction(TransformType::kMonth, source_type) {}
 
 Result<Literal> MonthTransform::Transform(const Literal& literal) {
-  assert(literal.type() == source_type());
-  if (literal.IsBelowMin() || literal.IsAboveMax()) {
-    return InvalidArgument(
-        "Cannot apply month transform to literal with value {} of type {}",
-        literal.ToString(), source_type()->ToString());
-  }
-  if (literal.IsNull()) [[unlikely]] {
-    return Literal::Null(int32());
-  }
-
-  using namespace std::chrono;  // NOLINT
-  switch (source_type()->type_id()) {
-    case TypeId::kDate: {
-      auto value = std::get<int32_t>(literal.value());
-      auto epoch = sys_days(year{1970} / January / 1);
-      auto ymd = year_month_day(epoch + days{value});
-      auto epoch_ymd = year_month_day(epoch);
-      auto delta = ymd.year() - epoch_ymd.year();
-      // Calculate the month as months from 1970-01
-      // Note: January is month 1, so we subtract 1 to get zero-based
-      // month count.
-      return Literal::Int(static_cast<int32_t>(delta.count() * 12 +
-                                               
static_cast<unsigned>(ymd.month()) - 1));
-    }
-    case TypeId::kTimestamp:
-    case TypeId::kTimestampTz: {
-      auto value = std::get<int64_t>(literal.value());
-      // Convert microseconds-since-epoch into a `year_month_day` object
-      auto ymd = 
year_month_day(floor<days>(sys_time<microseconds>(microseconds{value})));
-      auto epoch_ymd = year_month_day(year{1970} / January / 1);
-      auto delta = ymd.year() - epoch_ymd.year();
-      // Calculate the month as months from 1970-01
-      // Note: January is month 1, so we subtract 1 to get zero-based
-      // month count.
-      return Literal::Int(static_cast<int32_t>(delta.count() * 12 +
-                                               
static_cast<unsigned>(ymd.month()) - 1));
-    }
-    default:
-      std::unreachable();
-  }
+  ICEBERG_DCHECK(*literal.type() == *source_type(),
+                 "Literal type must match source type");
+  return TemporalUtils::ExtractMonth(literal);
 }
 
 std::shared_ptr<Type> MonthTransform::ResultType() const { return int32(); }
@@ -324,34 +188,9 @@ DayTransform::DayTransform(std::shared_ptr<Type> const& 
source_type)
     : TransformFunction(TransformType::kDay, source_type) {}
 
 Result<Literal> DayTransform::Transform(const Literal& literal) {
-  assert(literal.type() == source_type());
-  if (literal.IsBelowMin() || literal.IsAboveMax()) {
-    return InvalidArgument(
-        "Cannot apply day transform to literal with value {} of type {}",
-        literal.ToString(), source_type()->ToString());
-  }
-  if (literal.IsNull()) [[unlikely]] {
-    return Literal::Null(int32());
-  }
-
-  using namespace std::chrono;  // NOLINT
-  switch (source_type()->type_id()) {
-    case TypeId::kDate: {
-      return Literal::Int(std::get<int32_t>(literal.value()));
-    }
-    case TypeId::kTimestamp:
-    case TypeId::kTimestampTz: {
-      auto value = std::get<int64_t>(literal.value());
-      // Convert microseconds to `sys_days` (chronological days since epoch)
-      auto timestamp = sys_time<microseconds>(microseconds{value});
-      auto days_since_epoch = floor<days>(timestamp);
-
-      return Literal::Int(
-          static_cast<int32_t>(days_since_epoch.time_since_epoch().count()));
-    }
-    default:
-      std::unreachable();
-  }
+  ICEBERG_DCHECK(*literal.type() == *source_type(),
+                 "Literal type must match source type");
+  return TemporalUtils::ExtractDay(literal);
 }
 
 std::shared_ptr<Type> DayTransform::ResultType() const { return int32(); }
@@ -377,33 +216,9 @@ HourTransform::HourTransform(std::shared_ptr<Type> const& 
source_type)
     : TransformFunction(TransformType::kHour, source_type) {}
 
 Result<Literal> HourTransform::Transform(const Literal& literal) {
-  assert(literal.type() == source_type());
-  if (literal.IsBelowMin() || literal.IsAboveMax()) {
-    return InvalidArgument(
-        "Cannot apply hour transform to literal with value {} of type {}",
-        literal.ToString(), source_type()->ToString());
-  }
-
-  if (literal.IsNull()) [[unlikely]] {
-    return Literal::Null(int32());
-  }
-
-  using namespace std::chrono;  // NOLINT
-  switch (source_type()->type_id()) {
-    case TypeId::kTimestamp:
-    case TypeId::kTimestampTz: {
-      auto value = std::get<int64_t>(literal.value());
-      // Create a `sys_time` object from the microseconds value
-      auto timestamp = sys_time<microseconds>(microseconds{value});
-
-      // Convert the time since epoch directly into hours
-      auto hours_since_epoch = 
duration_cast<hours>(timestamp.time_since_epoch()).count();
-
-      return Literal::Int(static_cast<int32_t>(hours_since_epoch));
-    }
-    default:
-      std::unreachable();
-  }
+  ICEBERG_DCHECK(*literal.type() == *source_type(),
+                 "Literal type must match source type");
+  return TemporalUtils::ExtractHour(literal);
 }
 
 std::shared_ptr<Type> HourTransform::ResultType() const { return int32(); }
diff --git a/src/iceberg/transform_function.h b/src/iceberg/transform_function.h
index 165390b..fc0dd72 100644
--- a/src/iceberg/transform_function.h
+++ b/src/iceberg/transform_function.h
@@ -51,6 +51,9 @@ class ICEBERG_EXPORT BucketTransform : public 
TransformFunction {
   BucketTransform(std::shared_ptr<Type> const& source_type, int32_t 
num_buckets);
 
   /// \brief Applies the bucket hash function to the input Literal.
+  ///
+  /// Reference:
+  /// - https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements
   Result<Literal> Transform(const Literal& literal) override;
 
   /// \brief Returns INT32 as the output type.
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index 3bd067d..4367448 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -115,6 +115,9 @@ class NameMapping;
 enum class SnapshotRefType;
 enum class TransformType;
 
+class Decimal;
+class Uuid;
+
 class Expression;
 class Literal;
 
diff --git a/src/iceberg/util/bucket_util.cc b/src/iceberg/util/bucket_util.cc
new file mode 100644
index 0000000..88b240d
--- /dev/null
+++ b/src/iceberg/util/bucket_util.cc
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/bucket_util.h"
+
+#include <utility>
+
+#include "iceberg/expression/literal.h"
+#include "iceberg/util/endian.h"
+#include "iceberg/util/murmurhash3_internal.h"
+
+namespace iceberg {
+
+namespace {
+template <TypeId type_id>
+int32_t HashLiteral(const Literal& literal) {
+  std::unreachable();
+}
+
+template <>
+int32_t HashLiteral<TypeId::kInt>(const Literal& literal) {
+  return BucketUtils::HashInt(std::get<int32_t>(literal.value()));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kDate>(const Literal& literal) {
+  return BucketUtils::HashInt(std::get<int32_t>(literal.value()));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kLong>(const Literal& literal) {
+  return BucketUtils::HashLong(std::get<int64_t>(literal.value()));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kTime>(const Literal& literal) {
+  return BucketUtils::HashLong(std::get<int64_t>(literal.value()));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kTimestamp>(const Literal& literal) {
+  return BucketUtils::HashLong(std::get<int64_t>(literal.value()));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kTimestampTz>(const Literal& literal) {
+  return BucketUtils::HashLong(std::get<int64_t>(literal.value()));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kDecimal>(const Literal& literal) {
+  const auto& decimal = std::get<Decimal>(literal.value());
+  return BucketUtils::HashBytes(decimal.ToBigEndian());
+}
+
+template <>
+int32_t HashLiteral<TypeId::kString>(const Literal& literal) {
+  const auto& str = std::get<std::string>(literal.value());
+  return BucketUtils::HashBytes(
+      std::span<const uint8_t>(reinterpret_cast<const uint8_t*>(str.data()), 
str.size()));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kUuid>(const Literal& literal) {
+  const auto& uuid = std::get<Uuid>(literal.value());
+  return BucketUtils::HashBytes(uuid.bytes());
+}
+
+template <>
+int32_t HashLiteral<TypeId::kBinary>(const Literal& literal) {
+  const auto& binary = std::get<std::vector<uint8_t>>(literal.value());
+  return BucketUtils::HashBytes(binary);
+}
+
+template <>
+int32_t HashLiteral<TypeId::kFixed>(const Literal& literal) {
+  const auto& fixed = std::get<std::vector<uint8_t>>(literal.value());
+  return BucketUtils::HashBytes(fixed);
+}
+
+}  // namespace
+
+int32_t BucketUtils::HashBytes(std::span<const uint8_t> bytes) {
+  int32_t hash_value = 0;
+  MurmurHash3_x86_32(bytes.data(), bytes.size(), 0, &hash_value);
+  return hash_value;
+}
+
+int32_t BucketUtils::HashLong(int64_t value) {
+  int32_t hash_value = 0;
+  value = ToLittleEndian(value);
+  MurmurHash3_x86_32(&value, sizeof(int64_t), 0, &hash_value);
+  return hash_value;
+}
+
+#define DISPATCH_HASH_LITERAL(TYPE_ID)          \
+  case TYPE_ID:                                 \
+    hash_value = HashLiteral<TYPE_ID>(literal); \
+    break;
+
+Result<int32_t> BucketUtils::BucketIndex(const Literal& literal, int32_t 
num_buckets) {
+  if (num_buckets <= 0) [[unlikely]] {
+    return InvalidArgument("Number of buckets must be positive, got {}", 
num_buckets);
+  }
+
+  if (literal.IsAboveMax() || literal.IsBelowMin()) [[unlikely]] {
+    return NotSupported("Cannot compute bucket index for {}", 
literal.ToString());
+  }
+
+  int32_t hash_value = 0;
+  switch (literal.type()->type_id()) {
+    DISPATCH_HASH_LITERAL(TypeId::kInt)
+    DISPATCH_HASH_LITERAL(TypeId::kDate)
+    DISPATCH_HASH_LITERAL(TypeId::kLong)
+    DISPATCH_HASH_LITERAL(TypeId::kTime)
+    DISPATCH_HASH_LITERAL(TypeId::kTimestamp)
+    DISPATCH_HASH_LITERAL(TypeId::kTimestampTz)
+    DISPATCH_HASH_LITERAL(TypeId::kDecimal)
+    DISPATCH_HASH_LITERAL(TypeId::kString)
+    DISPATCH_HASH_LITERAL(TypeId::kUuid)
+    DISPATCH_HASH_LITERAL(TypeId::kBinary)
+    DISPATCH_HASH_LITERAL(TypeId::kFixed)
+    default:
+      return NotSupported("Hashing not supported for type {}",
+                          literal.type()->ToString());
+  }
+
+  return (hash_value & std::numeric_limits<int32_t>::max()) % num_buckets;
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/bucket_util.h b/src/iceberg/util/bucket_util.h
new file mode 100644
index 0000000..31a574b
--- /dev/null
+++ b/src/iceberg/util/bucket_util.h
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <span>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+class ICEBERG_EXPORT BucketUtils {
+ public:
+  /// \brief Hash a 32-bit integer using MurmurHash3 and return a 32-bit hash 
value.
+  /// \param value The input integer to hash.
+  /// \note Integer and long hash results must be identical for all integer 
values. This
+  /// ensures that schema evolution does not change bucket partition values if 
integer
+  /// types are promoted.
+  /// \return A 32-bit hash value.
+  static inline int32_t HashInt(int32_t value) {
+    return HashLong(static_cast<int64_t>(value));
+  }
+
+  /// \brief Hash a 64-bit integer using MurmurHash3 and return a 32-bit hash 
value.
+  /// \param value The input long to hash.
+  /// \return A 32-bit hash value.
+  static int32_t HashLong(int64_t value);
+
+  /// \brief Hash a byte array using MurmurHash3 and return a 32-bit hash 
value.
+  /// \param bytes The input byte array to hash.
+  /// \return A 32-bit hash value.
+  static int32_t HashBytes(std::span<const uint8_t> bytes);
+
+  /// \brief Compute the bucket index for a given literal and number of 
buckets.
+  /// \param literal The input literal to hash.
+  /// \param num_buckets The number of buckets to hash into.
+  /// \return (murmur3_x86_32_hash(literal) & Integer.MAX_VALUE) % num_buckets
+  static Result<int32_t> BucketIndex(const Literal& literal, int32_t 
num_buckets);
+};
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc
index e12e481..0cc7c55 100644
--- a/src/iceberg/util/conversions.cc
+++ b/src/iceberg/util/conversions.cc
@@ -23,6 +23,7 @@
 #include <span>
 #include <string>
 
+#include "iceberg/util/decimal.h"
 #include "iceberg/util/endian.h"
 #include "iceberg/util/macros.h"
 #include "iceberg/util/uuid.h"
@@ -64,6 +65,12 @@ Result<std::vector<uint8_t>> 
ToBytesImpl<TypeId::kBoolean>(const Literal::Value&
                                                     : 
static_cast<uint8_t>(0x00)};
 }
 
+template <>
+Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kDecimal>(const 
Literal::Value& value) {
+  const auto& decimal = std::get<Decimal>(value);
+  return decimal.ToBigEndian();
+}
+
 template <>
 Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kString>(const 
Literal::Value& value) {
   const auto& str = std::get<std::string>(value);
@@ -95,6 +102,7 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const 
PrimitiveType& type,
   const auto type_id = type.type_id();
 
   switch (type_id) {
+    DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kInt)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kDate)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kLong)
@@ -103,12 +111,11 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const 
PrimitiveType& type,
     DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
-    DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean)
+    DISPATCH_LITERAL_TO_BYTES(TypeId::kDecimal)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kString)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kUuid)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kBinary)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kFixed)
-      // TODO(Li Feiyang): Add support for Decimal
 
     default:
       return NotSupported("Serialization for type {} is not supported", 
type.ToString());
@@ -177,6 +184,11 @@ Result<Literal::Value> Conversions::FromBytes(const 
PrimitiveType& type,
         return Literal::Value{double_value};
       }
     }
+    case TypeId::kDecimal: {
+      ICEBERG_ASSIGN_OR_RAISE(auto decimal,
+                              Decimal::FromBigEndian(data.data(), 
data.size()));
+      return Literal::Value{decimal};
+    }
     case TypeId::kString:
       return Literal::Value{
           std::string(reinterpret_cast<const char*>(data.data()), 
data.size())};
@@ -194,7 +206,6 @@ Result<Literal::Value> Conversions::FromBytes(const 
PrimitiveType& type,
       }
       return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
     }
-    // TODO(Li Feiyang): Add support for Decimal
     default:
       return NotSupported("Deserialization for type {} is not supported",
                           type.ToString());
diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index 5018574..f33d932 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -24,12 +24,12 @@
 
 #include "iceberg/util/decimal.h"
 
+#include <algorithm>
 #include <bit>
 #include <charconv>
 #include <climits>
 #include <cmath>
 #include <cstring>
-#include <format>
 #include <iomanip>
 #include <limits>
 #include <sstream>
@@ -44,6 +44,16 @@ namespace iceberg {
 
 namespace {
 
+constexpr int32_t kMinDecimalBytes = 1;
+constexpr int32_t kMaxDecimalBytes = 16;
+
+// The maximum decimal value that can be represented with kMaxPrecision digits.
+// 10^38 - 1
+constexpr Decimal kMaxDecimalValue(5421010862427522170LL, 
687399551400673279ULL);
+// The mininum decimal value that can be represented with kMaxPrecision digits.
+// - (10^38 - 1)
+constexpr Decimal kMinDecimalValue(-5421010862427522171LL, 
17759344522308878337ULL);
+
 struct DecimalComponents {
   std::string_view while_digits;
   std::string_view fractional_digits;
@@ -275,8 +285,15 @@ bool RescaleWouldCauseDataLoss(const Decimal& value, 
int32_t delta_scale,
     return res->second != 0;
   }
 
+  auto max_safe_value = kMaxDecimalValue / multiplier;
+  auto min_safe_value = kMinDecimalValue / multiplier;
+  if (value > max_safe_value || value < min_safe_value) {
+    // Overflow would happen — treat as data loss
+    return true;
+  }
+
   *result = value * multiplier;
-  return (value < 0) ? *result > value : *result < value;
+  return false;
 }
 
 }  // namespace
@@ -470,11 +487,6 @@ Result<Decimal> Decimal::FromString(std::string_view str, 
int32_t* precision,
 }
 
 Result<Decimal> Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) {
-  static constexpr int32_t kMinDecimalBytes = 1;
-  static constexpr int32_t kMaxDecimalBytes = 16;
-
-  int64_t high, low;
-
   if (length < kMinDecimalBytes || length > kMaxDecimalBytes) {
     return InvalidArgument(
         "Decimal::FromBigEndian: length must be in the range [{}, {}], was {}",
@@ -486,7 +498,8 @@ Result<Decimal> Decimal::FromBigEndian(const uint8_t* 
bytes, int32_t length) {
   const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
 
   uint128_t result = 0;
-  std::memcpy(reinterpret_cast<uint8_t*>(&result) + 16 - length, bytes, 
length);
+  std::memcpy(reinterpret_cast<uint8_t*>(&result) + kMaxDecimalBytes - length, 
bytes,
+              length);
 
   if constexpr (std::endian::native == std::endian::little) {
     auto high = static_cast<uint64_t>(result >> 64);
@@ -505,6 +518,36 @@ Result<Decimal> Decimal::FromBigEndian(const uint8_t* 
bytes, int32_t length) {
   return Decimal(static_cast<int128_t>(result));
 }
 
+std::vector<uint8_t> Decimal::ToBigEndian() const {
+  std::vector<uint8_t> bytes(kMaxDecimalBytes);
+
+  auto uvalue = static_cast<uint128_t>(data_);
+  std::memcpy(bytes.data(), &uvalue, kMaxDecimalBytes);
+
+  if constexpr (std::endian::native == std::endian::little) {
+    std::ranges::reverse(bytes);
+  }
+
+  auto is_negative = data_ < 0;
+  int keep = kMaxDecimalBytes;
+  for (int32_t i = 0; i < kMaxDecimalBytes - 1; ++i) {
+    uint8_t byte = bytes[i];
+    uint8_t next = bytes[i + 1];
+    // For negative numbers, keep the leading 0xff byte if the next byte has 
its sign bit
+    // unset. For positive numbers, keep the leading 0x00 byte if the next 
byte has its
+    // sign bit set.
+    if ((is_negative && byte == 0xff && (next & 0x80)) ||
+        (!is_negative && byte == 0x00 && !(next & 0x80))) {
+      --keep;
+    } else {
+      break;
+    }
+  }
+
+  bytes.erase(bytes.begin(), bytes.begin() + (kMaxDecimalBytes - keep));
+  return bytes;
+}
+
 Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
   if (orig_scale == new_scale) {
     return *this;
@@ -518,10 +561,7 @@ Result<Decimal> Decimal::Rescale(int32_t orig_scale, 
int32_t new_scale) const {
 
   auto& multiplier = kDecimal128PowersOfTen[abs_delta_scale];
 
-  const bool rescale_would_cause_data_loss =
-      RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out);
-
-  if (rescale_would_cause_data_loss) {
+  if (RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out)) 
[[unlikely]] {
     return Invalid("Rescale {} from {} to {} would cause data loss", 
ToIntegerString(),
                    orig_scale, new_scale);
   }
@@ -534,6 +574,52 @@ bool Decimal::FitsInPrecision(int32_t precision) const {
   return Decimal::Abs(*this) < kDecimal128PowersOfTen[precision];
 }
 
+std::partial_ordering Decimal::Compare(const Decimal& lhs, const Decimal& rhs,
+                                       int32_t lhs_scale, int32_t rhs_scale) {
+  if (lhs_scale == rhs_scale || lhs.data_ == 0 || rhs.data_ == 0) {
+    return lhs <=> rhs;
+  }
+
+  // If one is negative and the other is positive, the positive is greater.
+  if (lhs.data_ < 0 && rhs.data_ > 0) {
+    return std::partial_ordering::less;
+  }
+  if (lhs.data_ > 0 && rhs.data_ < 0) {
+    return std::partial_ordering::greater;
+  }
+
+  // Both are negative
+  bool negative = lhs.data_ < 0 && rhs.data_ < 0;
+
+  const int32_t delta_scale = lhs_scale - rhs_scale;
+  const int32_t abs_delta_scale = std::abs(delta_scale);
+
+  ICEBERG_DCHECK(abs_delta_scale <= kMaxScale, "");
+
+  const auto& multiplier = kDecimal128PowersOfTen[abs_delta_scale];
+
+  Decimal adjusted_lhs;
+  Decimal adjusted_rhs;
+
+  if (delta_scale < 0) {
+    // lhs_scale < rhs_scale
+    if (RescaleWouldCauseDataLoss(lhs, -delta_scale, multiplier, 
&adjusted_lhs))
+        [[unlikely]] {
+      return negative ? std::partial_ordering::less : 
std::partial_ordering::greater;
+    }
+    adjusted_rhs = rhs;
+  } else {
+    // lhs_scale > rhs_scale
+    if (RescaleWouldCauseDataLoss(rhs, delta_scale, multiplier, &adjusted_rhs))
+        [[unlikely]] {
+      return negative ? std::partial_ordering::greater : 
std::partial_ordering::less;
+    }
+    adjusted_lhs = lhs;
+  }
+
+  return adjusted_lhs <=> adjusted_rhs;
+}
+
 std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
   std::array<uint8_t, kByteWidth> out{{0}};
   std::memcpy(out.data(), &data_, kByteWidth);
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
index 7e9cd7c..b7f57f4 100644
--- a/src/iceberg/util/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -25,11 +25,13 @@
 /// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
 
 #include <array>
+#include <compare>
 #include <cstdint>
 #include <iosfwd>
 #include <string>
 #include <string_view>
 #include <type_traits>
+#include <vector>
 
 #include "iceberg/iceberg_export.h"
 #include "iceberg/result.h"
@@ -142,7 +144,7 @@ class ICEBERG_EXPORT Decimal : public util::Formattable {
   /// \brief Convert the Decimal value to a base 10 decimal string with the 
given scale.
   /// \param scale The scale to use for the string representation.
   /// \return The string representation of the Decimal value.
-  Result<std::string> ToString(int32_t scale = 0) const;
+  Result<std::string> ToString(int32_t scale) const;
 
   /// \brief Convert the Decimal value to an integer string.
   std::string ToIntegerString() const;
@@ -164,6 +166,11 @@ class ICEBERG_EXPORT Decimal : public util::Formattable {
   /// \return error status if the length is an invalid value
   static Result<Decimal> FromBigEndian(const uint8_t* data, int32_t length);
 
+  /// \brief Convert Decimal's unscaled value to two’s-complement big-endian 
binary, using
+  ///        the minimum number of bytes for the value.
+  /// \return A vector containing the big-endian bytes.
+  std::vector<uint8_t> ToBigEndian() const;
+
   /// \brief Convert Decimal from one scale to another.
   Result<Decimal> Rescale(int32_t orig_scale, int32_t new_scale) const;
 
@@ -180,6 +187,10 @@ class ICEBERG_EXPORT Decimal : public util::Formattable {
     return low() <=> other.low();
   }
 
+  /// \brief Compare two Decimals with different scales.
+  static std::partial_ordering Compare(const Decimal& lhs, const Decimal& rhs,
+                                       int32_t lhs_scale, int32_t rhs_scale);
+
   const uint8_t* native_endian_bytes() const {
     return reinterpret_cast<const uint8_t*>(&data_);
   }
diff --git a/src/iceberg/util/temporal_util.cc 
b/src/iceberg/util/temporal_util.cc
new file mode 100644
index 0000000..41748c9
--- /dev/null
+++ b/src/iceberg/util/temporal_util.cc
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/temporal_util.h"
+
+#include <chrono>
+#include <utility>
+
+#include "iceberg/expression/literal.h"
+
+namespace iceberg {
+
+namespace {
+
+using namespace std::chrono;  // NOLINT
+
+constexpr auto kEpochYmd = year{1970} / January / 1;
+constexpr auto kEpochDays = sys_days(kEpochYmd);
+
+inline constexpr year_month_day DateToYmd(int32_t days_since_epoch) {
+  return {kEpochDays + days{days_since_epoch}};
+}
+
+inline constexpr year_month_day TimestampToYmd(int64_t micros_since_epoch) {
+  return 
{floor<days>(sys_time<microseconds>(microseconds{micros_since_epoch}))};
+}
+
+template <typename Duration>
+  requires std::is_same_v<Duration, days> || std::is_same_v<Duration, hours>
+inline constexpr int32_t TimestampToDuration(int64_t micros_since_epoch) {
+  return static_cast<int32_t>(
+      floor<Duration>(
+          
sys_time<microseconds>(microseconds{micros_since_epoch}).time_since_epoch())
+          .count());
+}
+
+inline constexpr int32_t MonthsSinceEpoch(const year_month_day& ymd) {
+  auto delta = ymd.year() - kEpochYmd.year();
+  // Calculate the month as months from 1970-01
+  // Note: January is month 1, so we subtract 1 to get zero-based month count.
+  return static_cast<int32_t>(delta.count() * 12 + 
static_cast<unsigned>(ymd.month()) -
+                              1);
+}
+
+template <TypeId type_id>
+Result<Literal> ExtractYearImpl(const Literal& literal) {
+  std::unreachable();
+}
+
+template <>
+Result<Literal> ExtractYearImpl<TypeId::kDate>(const Literal& literal) {
+  auto value = std::get<int32_t>(literal.value());
+  auto ymd = DateToYmd(value);
+  return Literal::Int(static_cast<int32_t>(ymd.year()));
+}
+
+template <>
+Result<Literal> ExtractYearImpl<TypeId::kTimestamp>(const Literal& literal) {
+  auto value = std::get<int64_t>(literal.value());
+  auto ymd = TimestampToYmd(value);
+  return Literal::Int(static_cast<int32_t>(ymd.year()));
+}
+
+template <>
+Result<Literal> ExtractYearImpl<TypeId::kTimestampTz>(const Literal& literal) {
+  return ExtractYearImpl<TypeId::kTimestamp>(literal);
+}
+
+template <TypeId type_id>
+Result<Literal> ExtractMonthImpl(const Literal& literal) {
+  std::unreachable();
+}
+
+template <>
+Result<Literal> ExtractMonthImpl<TypeId::kDate>(const Literal& literal) {
+  auto value = std::get<int32_t>(literal.value());
+  auto ymd = DateToYmd(value);
+  return Literal::Int(MonthsSinceEpoch(ymd));
+}
+
+template <>
+Result<Literal> ExtractMonthImpl<TypeId::kTimestamp>(const Literal& literal) {
+  auto value = std::get<int64_t>(literal.value());
+  auto ymd = TimestampToYmd(value);
+  return Literal::Int(MonthsSinceEpoch(ymd));
+}
+
+template <>
+Result<Literal> ExtractMonthImpl<TypeId::kTimestampTz>(const Literal& literal) 
{
+  return ExtractMonthImpl<TypeId::kTimestamp>(literal);
+}
+
+template <TypeId type_id>
+Result<Literal> ExtractDayImpl(const Literal& literal) {
+  std::unreachable();
+}
+
+template <>
+Result<Literal> ExtractDayImpl<TypeId::kDate>(const Literal& literal) {
+  return Literal::Int(std::get<int32_t>(literal.value()));
+}
+
+template <>
+Result<Literal> ExtractDayImpl<TypeId::kTimestamp>(const Literal& literal) {
+  auto value = std::get<int64_t>(literal.value());
+  return Literal::Int(TimestampToDuration<days>(value));
+}
+
+template <>
+Result<Literal> ExtractDayImpl<TypeId::kTimestampTz>(const Literal& literal) {
+  return ExtractDayImpl<TypeId::kTimestamp>(literal);
+}
+
+template <TypeId type_id>
+Result<Literal> ExtractHourImpl(const Literal& literal) {
+  std::unreachable();
+}
+
+template <>
+Result<Literal> ExtractHourImpl<TypeId::kTimestamp>(const Literal& literal) {
+  auto value = std::get<int64_t>(literal.value());
+  return Literal::Int(TimestampToDuration<hours>(value));
+}
+
+template <>
+Result<Literal> ExtractHourImpl<TypeId::kTimestampTz>(const Literal& literal) {
+  return ExtractHourImpl<TypeId::kTimestamp>(literal);
+}
+
+}  // namespace
+
+#define DISPATCH_EXTRACT_YEAR(type_id) \
+  case type_id:                        \
+    return ExtractYearImpl<type_id>(literal);
+
+Result<Literal> TemporalUtils::ExtractYear(const Literal& literal) {
+  if (literal.IsNull()) [[unlikely]] {
+    return Literal::Null(int32());
+  }
+
+  if (literal.IsAboveMax() || literal.IsBelowMin()) [[unlikely]] {
+    return NotSupported("Cannot extract year from {}", literal.ToString());
+  }
+
+  switch (literal.type()->type_id()) {
+    DISPATCH_EXTRACT_YEAR(TypeId::kDate)
+    DISPATCH_EXTRACT_YEAR(TypeId::kTimestamp)
+    DISPATCH_EXTRACT_YEAR(TypeId::kTimestampTz)
+    default:
+      return NotSupported("Extract year from type {} is not supported",
+                          literal.type()->ToString());
+  }
+}
+
+#define DISPATCH_EXTRACT_MONTH(type_id) \
+  case type_id:                         \
+    return ExtractMonthImpl<type_id>(literal);
+
+Result<Literal> TemporalUtils::ExtractMonth(const Literal& literal) {
+  if (literal.IsNull()) [[unlikely]] {
+    return Literal::Null(int32());
+  }
+
+  if (literal.IsAboveMax() || literal.IsBelowMin()) [[unlikely]] {
+    return NotSupported("Cannot extract month from {}", literal.ToString());
+  }
+
+  switch (literal.type()->type_id()) {
+    DISPATCH_EXTRACT_MONTH(TypeId::kDate)
+    DISPATCH_EXTRACT_MONTH(TypeId::kTimestamp)
+    DISPATCH_EXTRACT_MONTH(TypeId::kTimestampTz)
+    default:
+      return NotSupported("Extract month from type {} is not supported",
+                          literal.type()->ToString());
+  }
+}
+
+#define DISPATCH_EXTRACT_DAY(type_id) \
+  case type_id:                       \
+    return ExtractDayImpl<type_id>(literal);
+
+Result<Literal> TemporalUtils::ExtractDay(const Literal& literal) {
+  if (literal.IsNull()) [[unlikely]] {
+    return Literal::Null(int32());
+  }
+
+  if (literal.IsAboveMax() || literal.IsBelowMin()) [[unlikely]] {
+    return NotSupported("Cannot extract day from {}", literal.ToString());
+  }
+
+  switch (literal.type()->type_id()) {
+    DISPATCH_EXTRACT_DAY(TypeId::kDate)
+    DISPATCH_EXTRACT_DAY(TypeId::kTimestamp)
+    DISPATCH_EXTRACT_DAY(TypeId::kTimestampTz)
+    default:
+      return NotSupported("Extract day from type {} is not supported",
+                          literal.type()->ToString());
+  }
+}
+
+#define DISPATCH_EXTRACT_HOUR(type_id) \
+  case type_id:                        \
+    return ExtractHourImpl<type_id>(literal);
+
+Result<Literal> TemporalUtils::ExtractHour(const Literal& literal) {
+  if (literal.IsNull()) [[unlikely]] {
+    return Literal::Null(int32());
+  }
+
+  if (literal.IsAboveMax() || literal.IsBelowMin()) [[unlikely]] {
+    return NotSupported("Cannot extract hour from {}", literal.ToString());
+  }
+
+  switch (literal.type()->type_id()) {
+    DISPATCH_EXTRACT_HOUR(TypeId::kTimestamp)
+    DISPATCH_EXTRACT_HOUR(TypeId::kTimestampTz)
+    default:
+      return NotSupported("Extract hour from type {} is not supported",
+                          literal.type()->ToString());
+  }
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/temporal_util.h b/src/iceberg/util/temporal_util.h
new file mode 100644
index 0000000..750c3d8
--- /dev/null
+++ b/src/iceberg/util/temporal_util.h
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+class ICEBERG_EXPORT TemporalUtils {
+ public:
+  /// \brief Extract a date or timestamp year, as years from 1970
+  static Result<Literal> ExtractYear(const Literal& literal);
+
+  /// \brief Extract a date or timestamp month, as months from 1970-01-01
+  static Result<Literal> ExtractMonth(const Literal& literal);
+
+  /// \brief Extract a date or timestamp day, as days from 1970-01-01
+  static Result<Literal> ExtractDay(const Literal& literal);
+
+  /// \brief Extract a timestamp hour, as hours from 1970-01-01 00:00:00
+  static Result<Literal> ExtractHour(const Literal& literal);
+};
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/truncate_util.cc 
b/src/iceberg/util/truncate_util.cc
new file mode 100644
index 0000000..9d0c6e7
--- /dev/null
+++ b/src/iceberg/util/truncate_util.cc
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/truncate_util.h"
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "iceberg/expression/literal.h"
+#include "iceberg/util/checked_cast.h"
+
+namespace iceberg {
+
+namespace {
+template <TypeId type_id>
+Literal TruncateLiteralImpl(const Literal& literal, int32_t width) {
+  std::unreachable();
+}
+
+template <>
+Literal TruncateLiteralImpl<TypeId::kInt>(const Literal& literal, int32_t 
width) {
+  int32_t v = std::get<int32_t>(literal.value());
+  return Literal::Int(TruncateUtils::TruncateInteger(v, width));
+}
+
+template <>
+Literal TruncateLiteralImpl<TypeId::kLong>(const Literal& literal, int32_t 
width) {
+  int64_t v = std::get<int64_t>(literal.value());
+  return Literal::Long(TruncateUtils::TruncateInteger(v, width));
+}
+
+template <>
+Literal TruncateLiteralImpl<TypeId::kDecimal>(const Literal& literal, int32_t 
width) {
+  const auto& decimal = std::get<Decimal>(literal.value());
+  auto type = internal::checked_pointer_cast<DecimalType>(literal.type());
+  return Literal::Decimal(TruncateUtils::TruncateDecimal(decimal, 
width).value(),
+                          type->precision(), type->scale());
+}
+
+template <>
+Literal TruncateLiteralImpl<TypeId::kString>(const Literal& literal, int32_t 
width) {
+  // Strings are truncated to a valid UTF-8 string with no more than `width` 
code points.
+  const auto& str = std::get<std::string>(literal.value());
+  return Literal::String(TruncateUtils::TruncateUTF8(str, width));
+}
+
+template <>
+Literal TruncateLiteralImpl<TypeId::kBinary>(const Literal& literal, int32_t 
width) {
+  // In contrast to strings, binary values do not have an assumed encoding and 
are
+  // truncated to `width` bytes.
+  const auto& data = std::get<std::vector<uint8_t>>(literal.value());
+  if (data.size() <= width) {
+    return literal;
+  }
+  return Literal::Binary(std::vector<uint8_t>(data.begin(), data.begin() + 
width));
+}
+
+}  // namespace
+
+Decimal TruncateUtils::TruncateDecimal(const Decimal& decimal, int32_t width) {
+  return decimal - (((decimal % width) + width) % width);
+}
+
+#define DISPATCH_TRUNCATE_LITERAL(TYPE_ID) \
+  case TYPE_ID:                            \
+    return TruncateLiteralImpl<TYPE_ID>(literal, width);
+
+Result<Literal> TruncateUtils::TruncateLiteral(const Literal& literal, int32_t 
width) {
+  if (literal.IsNull()) [[unlikely]] {
+    // Return null as is
+    return literal;
+  }
+
+  if (literal.IsAboveMax() || literal.IsBelowMin()) [[unlikely]] {
+    return NotSupported("Cannot truncate {}", literal.ToString());
+  }
+
+  switch (literal.type()->type_id()) {
+    DISPATCH_TRUNCATE_LITERAL(TypeId::kInt)
+    DISPATCH_TRUNCATE_LITERAL(TypeId::kLong)
+    DISPATCH_TRUNCATE_LITERAL(TypeId::kDecimal)
+    DISPATCH_TRUNCATE_LITERAL(TypeId::kString)
+    DISPATCH_TRUNCATE_LITERAL(TypeId::kBinary)
+    default:
+      return NotSupported("Truncate is not supported for type: {}",
+                          literal.type()->ToString());
+  }
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/truncate_util.h b/src/iceberg/util/truncate_util.h
index 5e76135..881c1d7 100644
--- a/src/iceberg/util/truncate_util.h
+++ b/src/iceberg/util/truncate_util.h
@@ -19,10 +19,13 @@
 
 #pragma once
 
+#include <cstdint>
 #include <string>
 #include <utility>
 
 #include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
 
 namespace iceberg {
 
@@ -64,9 +67,25 @@ class ICEBERG_EXPORT TruncateUtils {
   /// values, the correct truncate function is: v - (((v % W) + W) % W)
   template <typename T>
     requires std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>
-  static inline T TruncateInteger(T v, size_t W) {
+  static inline T TruncateInteger(T v, int32_t W) {
     return v - (((v % W) + W) % W);
   }
+
+  /// \brief Truncate a Decimal to a specified width.
+  /// \param decimal The input Decimal to truncate.
+  /// \param width The width to truncate to.
+  /// \return A Decimal truncated to the specified width.
+  static Decimal TruncateDecimal(const Decimal& decimal, int32_t width);
+
+  /// \brief Truncate a Literal to a specified width.
+  /// \param literal The input Literal to truncate.
+  /// \param width The width to truncate to.
+  /// \return A Result containing the truncated Literal or an error.
+  /// Supported types are: INT, LONG, DECIMAL, STRING, BINARY.
+  /// Reference:
+  /// - [Truncate Transform
+  /// Details](https://iceberg.apache.org/spec/#truncate-transform-details)
+  static Result<Literal> TruncateLiteral(const Literal& literal, int32_t 
width);
 };
 
 }  // namespace iceberg

Reply via email to