This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 81bf29e  feat: Literal adapt Uuid representation (#253)
81bf29e is described below

commit 81bf29e8d4af2bd7d29c0f4fbea83f62e5863153
Author: Junwang Zhao <[email protected]>
AuthorDate: Sun Oct 12 02:16:02 2025 +0800

    feat: Literal adapt Uuid representation (#253)
---
 src/iceberg/expression/literal.cc | 15 ++++++++++++++-
 src/iceberg/expression/literal.h  | 10 +++++++++-
 src/iceberg/test/literal_test.cc  | 18 ++++++++++++++++++
 src/iceberg/transform_function.cc |  4 ++++
 src/iceberg/util/conversions.cc   | 16 ++++++++++++++--
 5 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/iceberg/expression/literal.cc 
b/src/iceberg/expression/literal.cc
index adfe535..bd76d9c 100644
--- a/src/iceberg/expression/literal.cc
+++ b/src/iceberg/expression/literal.cc
@@ -147,6 +147,8 @@ Literal Literal::Double(double value) { return 
{Value{value}, float64()}; }
 
 Literal Literal::String(std::string value) { return {Value{std::move(value)}, 
string()}; }
 
+Literal Literal::UUID(Uuid value) { return {Value{std::move(value)}, uuid()}; }
+
 Literal Literal::Binary(std::vector<uint8_t> value) {
   return {Value{std::move(value)}, binary()};
 }
@@ -251,6 +253,15 @@ std::partial_ordering Literal::operator<=>(const Literal& 
other) const {
       return this_val <=> other_val;
     }
 
+    case TypeId::kUuid: {
+      auto& this_val = std::get<Uuid>(value_);
+      auto& other_val = std::get<Uuid>(other.value_);
+      if (this_val == other_val) {
+        return std::partial_ordering::equivalent;
+      }
+      return std::partial_ordering::unordered;
+    }
+
     case TypeId::kBinary: {
       auto& this_val = std::get<std::vector<uint8_t>>(value_);
       auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
@@ -299,6 +310,9 @@ std::string Literal::ToString() const {
     case TypeId::kString: {
       return std::get<std::string>(value_);
     }
+    case TypeId::kUuid: {
+      return std::get<Uuid>(value_).ToString();
+    }
     case TypeId::kBinary: {
       const auto& binary_data = std::get<std::vector<uint8_t>>(value_);
       std::string result;
@@ -318,7 +332,6 @@ std::string Literal::ToString() const {
       return result;
     }
     case TypeId::kDecimal:
-    case TypeId::kUuid:
     case TypeId::kDate:
     case TypeId::kTime:
     case TypeId::kTimestamp:
diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h
index c11d48f..70ff2d8 100644
--- a/src/iceberg/expression/literal.h
+++ b/src/iceberg/expression/literal.h
@@ -28,6 +28,7 @@
 #include "iceberg/result.h"
 #include "iceberg/type.h"
 #include "iceberg/util/formattable.h"
+#include "iceberg/util/uuid.h"
 
 namespace iceberg {
 
@@ -56,8 +57,9 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
                              float,           // for float
                              double,          // for double
                              std::string,     // for string
+                             Uuid,            // for uuid
                              std::vector<uint8_t>,     // for binary, fixed
-                             std::array<uint8_t, 16>,  // for uuid and decimal
+                             std::array<uint8_t, 16>,  // for decimal
                              BelowMin, AboveMax>;
 
   /// \brief Factory methods for primitive types
@@ -71,6 +73,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
   static Literal Float(float value);
   static Literal Double(double value);
   static Literal String(std::string value);
+  static Literal UUID(Uuid value);
   static Literal Binary(std::vector<uint8_t> value);
   static Literal Fixed(std::vector<uint8_t> value);
 
@@ -207,6 +210,11 @@ struct LiteralTraits<TypeId::kString> {
   using ValueType = std::string;
 };
 
+template <>
+struct LiteralTraits<TypeId::kUuid> {
+  using ValueType = Uuid;
+};
+
 template <>
 struct LiteralTraits<TypeId::kBinary> {
   using ValueType = std::vector<uint8_t>;
diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc
index bd7544b..58cc906 100644
--- a/src/iceberg/test/literal_test.cc
+++ b/src/iceberg/test/literal_test.cc
@@ -216,6 +216,15 @@ TEST(LiteralTest, StringBasics) {
   EXPECT_EQ(empty_string.ToString(), "");
 }
 
+// Uuid type tests
+TEST(LiteralTest, UuidBasics) {
+  auto uuid = Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value();
+  auto uuid_literal = Literal::UUID(uuid);
+
+  EXPECT_EQ(uuid_literal.type()->type_id(), TypeId::kUuid);
+  EXPECT_EQ(uuid_literal.ToString(), "123e4567-e89b-12d3-a456-426614174000");
+}
+
 TEST(LiteralTest, StringComparison) {
   auto string1 = Literal::String("apple");
   auto string2 = Literal::String("banana");
@@ -480,6 +489,15 @@ INSTANTIATE_TEST_SUITE_P(
                      Literal::String("AAAAAAAAAAAAAAAA"),
                      string()},
 
+        // Uuid type
+        LiteralParam{
+            "Uuid",
+            {0x12, 0x3E, 0x45, 0x67, 0xE8, 0x9B, 0x12, 0xD3, 0xA4, 0x56, 0x42, 
0x66, 0x14,
+             0x17, 0x40, 0x00},
+            Literal::UUID(
+                
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
+            uuid()},
+
         LiteralParam{"BinaryData",
                      {0x01, 0x02, 0x03, 0xFF},
                      Literal::Binary({0x01, 0x02, 0x03, 0xFF}),
diff --git a/src/iceberg/transform_function.cc 
b/src/iceberg/transform_function.cc
index b043c39..fd9a165 100644
--- a/src/iceberg/transform_function.cc
+++ b/src/iceberg/transform_function.cc
@@ -29,6 +29,7 @@
 #include "iceberg/type.h"
 #include "iceberg/util/murmurhash3_internal.h"
 #include "iceberg/util/truncate_util.h"
+#include "iceberg/util/uuid.h"
 
 namespace iceberg {
 
@@ -75,6 +76,9 @@ Result<Literal> BucketTransform::Transform(const Literal& 
literal) {
           MurmurHash3_x86_32(value.data(), sizeof(uint8_t) * 16, 0, 
&hash_value);
         } else if constexpr (std::is_same_v<T, std::string>) {
           MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
+        } else if constexpr (std::is_same_v<T, Uuid>) {
+          MurmurHash3_x86_32(std::get<Uuid>(literal.value()).bytes().data(),
+                             Uuid::kLength, 0, &hash_value);
         } else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
           MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
         } else if constexpr (std::is_same_v<T, std::monostate> ||
diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc
index c5dbcf3..e12e481 100644
--- a/src/iceberg/util/conversions.cc
+++ b/src/iceberg/util/conversions.cc
@@ -25,6 +25,7 @@
 
 #include "iceberg/util/endian.h"
 #include "iceberg/util/macros.h"
+#include "iceberg/util/uuid.h"
 
 namespace iceberg {
 
@@ -69,6 +70,12 @@ Result<std::vector<uint8_t>> 
ToBytesImpl<TypeId::kString>(const Literal::Value&
   return std::vector<uint8_t>(str.begin(), str.end());
 }
 
+template <>
+Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kUuid>(const Literal::Value& 
value) {
+  const auto& uuid = std::get<Uuid>(value);
+  return std::vector<uint8_t>(uuid.bytes().begin(), uuid.bytes().end());
+}
+
 template <>
 Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kBinary>(const 
Literal::Value& value) {
   return std::get<std::vector<uint8_t>>(value);
@@ -98,9 +105,10 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const 
PrimitiveType& type,
     DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kString)
+    DISPATCH_LITERAL_TO_BYTES(TypeId::kUuid)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kBinary)
     DISPATCH_LITERAL_TO_BYTES(TypeId::kFixed)
-      // TODO(Li Feiyang): Add support for UUID and Decimal
+      // TODO(Li Feiyang): Add support for Decimal
 
     default:
       return NotSupported("Serialization for type {} is not supported", 
type.ToString());
@@ -172,6 +180,10 @@ Result<Literal::Value> Conversions::FromBytes(const 
PrimitiveType& type,
     case TypeId::kString:
       return Literal::Value{
           std::string(reinterpret_cast<const char*>(data.data()), 
data.size())};
+    case TypeId::kUuid: {
+      ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromBytes(data));
+      return Literal::Value{uuid};
+    }
     case TypeId::kBinary:
       return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
     case TypeId::kFixed: {
@@ -182,7 +194,7 @@ Result<Literal::Value> Conversions::FromBytes(const 
PrimitiveType& type,
       }
       return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
     }
-      // TODO(Li Feiyang): Add support for UUID and Decimal
+    // TODO(Li Feiyang): Add support for Decimal
     default:
       return NotSupported("Deserialization for type {} is not supported",
                           type.ToString());

Reply via email to