This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 81bf29e feat: Literal adapt Uuid representation (#253)
81bf29e is described below
commit 81bf29e8d4af2bd7d29c0f4fbea83f62e5863153
Author: Junwang Zhao <[email protected]>
AuthorDate: Sun Oct 12 02:16:02 2025 +0800
feat: Literal adapt Uuid representation (#253)
---
src/iceberg/expression/literal.cc | 15 ++++++++++++++-
src/iceberg/expression/literal.h | 10 +++++++++-
src/iceberg/test/literal_test.cc | 18 ++++++++++++++++++
src/iceberg/transform_function.cc | 4 ++++
src/iceberg/util/conversions.cc | 16 ++++++++++++++--
5 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/src/iceberg/expression/literal.cc
b/src/iceberg/expression/literal.cc
index adfe535..bd76d9c 100644
--- a/src/iceberg/expression/literal.cc
+++ b/src/iceberg/expression/literal.cc
@@ -147,6 +147,8 @@ Literal Literal::Double(double value) { return
{Value{value}, float64()}; }
Literal Literal::String(std::string value) { return {Value{std::move(value)},
string()}; }
+Literal Literal::UUID(Uuid value) { return {Value{std::move(value)}, uuid()}; }
+
Literal Literal::Binary(std::vector<uint8_t> value) {
return {Value{std::move(value)}, binary()};
}
@@ -251,6 +253,15 @@ std::partial_ordering Literal::operator<=>(const Literal&
other) const {
return this_val <=> other_val;
}
+ case TypeId::kUuid: {
+ auto& this_val = std::get<Uuid>(value_);
+ auto& other_val = std::get<Uuid>(other.value_);
+ if (this_val == other_val) {
+ return std::partial_ordering::equivalent;
+ }
+ return std::partial_ordering::unordered;
+ }
+
case TypeId::kBinary: {
auto& this_val = std::get<std::vector<uint8_t>>(value_);
auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
@@ -299,6 +310,9 @@ std::string Literal::ToString() const {
case TypeId::kString: {
return std::get<std::string>(value_);
}
+ case TypeId::kUuid: {
+ return std::get<Uuid>(value_).ToString();
+ }
case TypeId::kBinary: {
const auto& binary_data = std::get<std::vector<uint8_t>>(value_);
std::string result;
@@ -318,7 +332,6 @@ std::string Literal::ToString() const {
return result;
}
case TypeId::kDecimal:
- case TypeId::kUuid:
case TypeId::kDate:
case TypeId::kTime:
case TypeId::kTimestamp:
diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h
index c11d48f..70ff2d8 100644
--- a/src/iceberg/expression/literal.h
+++ b/src/iceberg/expression/literal.h
@@ -28,6 +28,7 @@
#include "iceberg/result.h"
#include "iceberg/type.h"
#include "iceberg/util/formattable.h"
+#include "iceberg/util/uuid.h"
namespace iceberg {
@@ -56,8 +57,9 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
float, // for float
double, // for double
std::string, // for string
+ Uuid, // for uuid
std::vector<uint8_t>, // for binary, fixed
- std::array<uint8_t, 16>, // for uuid and decimal
+ std::array<uint8_t, 16>, // for decimal
BelowMin, AboveMax>;
/// \brief Factory methods for primitive types
@@ -71,6 +73,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
static Literal Float(float value);
static Literal Double(double value);
static Literal String(std::string value);
+ static Literal UUID(Uuid value);
static Literal Binary(std::vector<uint8_t> value);
static Literal Fixed(std::vector<uint8_t> value);
@@ -207,6 +210,11 @@ struct LiteralTraits<TypeId::kString> {
using ValueType = std::string;
};
+template <>
+struct LiteralTraits<TypeId::kUuid> {
+ using ValueType = Uuid;
+};
+
template <>
struct LiteralTraits<TypeId::kBinary> {
using ValueType = std::vector<uint8_t>;
diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc
index bd7544b..58cc906 100644
--- a/src/iceberg/test/literal_test.cc
+++ b/src/iceberg/test/literal_test.cc
@@ -216,6 +216,15 @@ TEST(LiteralTest, StringBasics) {
EXPECT_EQ(empty_string.ToString(), "");
}
+// Uuid type tests
+TEST(LiteralTest, UuidBasics) {
+ auto uuid = Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value();
+ auto uuid_literal = Literal::UUID(uuid);
+
+ EXPECT_EQ(uuid_literal.type()->type_id(), TypeId::kUuid);
+ EXPECT_EQ(uuid_literal.ToString(), "123e4567-e89b-12d3-a456-426614174000");
+}
+
TEST(LiteralTest, StringComparison) {
auto string1 = Literal::String("apple");
auto string2 = Literal::String("banana");
@@ -480,6 +489,15 @@ INSTANTIATE_TEST_SUITE_P(
Literal::String("AAAAAAAAAAAAAAAA"),
string()},
+ // Uuid type
+ LiteralParam{
+ "Uuid",
+ {0x12, 0x3E, 0x45, 0x67, 0xE8, 0x9B, 0x12, 0xD3, 0xA4, 0x56, 0x42,
0x66, 0x14,
+ 0x17, 0x40, 0x00},
+ Literal::UUID(
+
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
+ uuid()},
+
LiteralParam{"BinaryData",
{0x01, 0x02, 0x03, 0xFF},
Literal::Binary({0x01, 0x02, 0x03, 0xFF}),
diff --git a/src/iceberg/transform_function.cc
b/src/iceberg/transform_function.cc
index b043c39..fd9a165 100644
--- a/src/iceberg/transform_function.cc
+++ b/src/iceberg/transform_function.cc
@@ -29,6 +29,7 @@
#include "iceberg/type.h"
#include "iceberg/util/murmurhash3_internal.h"
#include "iceberg/util/truncate_util.h"
+#include "iceberg/util/uuid.h"
namespace iceberg {
@@ -75,6 +76,9 @@ Result<Literal> BucketTransform::Transform(const Literal&
literal) {
MurmurHash3_x86_32(value.data(), sizeof(uint8_t) * 16, 0,
&hash_value);
} else if constexpr (std::is_same_v<T, std::string>) {
MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
+ } else if constexpr (std::is_same_v<T, Uuid>) {
+ MurmurHash3_x86_32(std::get<Uuid>(literal.value()).bytes().data(),
+ Uuid::kLength, 0, &hash_value);
} else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
} else if constexpr (std::is_same_v<T, std::monostate> ||
diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc
index c5dbcf3..e12e481 100644
--- a/src/iceberg/util/conversions.cc
+++ b/src/iceberg/util/conversions.cc
@@ -25,6 +25,7 @@
#include "iceberg/util/endian.h"
#include "iceberg/util/macros.h"
+#include "iceberg/util/uuid.h"
namespace iceberg {
@@ -69,6 +70,12 @@ Result<std::vector<uint8_t>>
ToBytesImpl<TypeId::kString>(const Literal::Value&
return std::vector<uint8_t>(str.begin(), str.end());
}
+template <>
+Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kUuid>(const Literal::Value&
value) {
+ const auto& uuid = std::get<Uuid>(value);
+ return std::vector<uint8_t>(uuid.bytes().begin(), uuid.bytes().end());
+}
+
template <>
Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kBinary>(const
Literal::Value& value) {
return std::get<std::vector<uint8_t>>(value);
@@ -98,9 +105,10 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const
PrimitiveType& type,
DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean)
DISPATCH_LITERAL_TO_BYTES(TypeId::kString)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kUuid)
DISPATCH_LITERAL_TO_BYTES(TypeId::kBinary)
DISPATCH_LITERAL_TO_BYTES(TypeId::kFixed)
- // TODO(Li Feiyang): Add support for UUID and Decimal
+ // TODO(Li Feiyang): Add support for Decimal
default:
return NotSupported("Serialization for type {} is not supported",
type.ToString());
@@ -172,6 +180,10 @@ Result<Literal::Value> Conversions::FromBytes(const
PrimitiveType& type,
case TypeId::kString:
return Literal::Value{
std::string(reinterpret_cast<const char*>(data.data()),
data.size())};
+ case TypeId::kUuid: {
+ ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromBytes(data));
+ return Literal::Value{uuid};
+ }
case TypeId::kBinary:
return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
case TypeId::kFixed: {
@@ -182,7 +194,7 @@ Result<Literal::Value> Conversions::FromBytes(const
PrimitiveType& type,
}
return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
}
- // TODO(Li Feiyang): Add support for UUID and Decimal
+ // TODO(Li Feiyang): Add support for Decimal
default:
return NotSupported("Deserialization for type {} is not supported",
type.ToString());