This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 84814bc1 feat: impl Transform::ToHumanString (#505)
84814bc1 is described below

commit 84814bc1003daf3f14138b80b8d2d9e07dd9e6a1
Author: wzhuo <[email protected]>
AuthorDate: Wed Jan 14 14:19:37 2026 +0800

    feat: impl Transform::ToHumanString (#505)
---
 src/iceberg/partition_spec.cc                   |   5 +-
 src/iceberg/test/CMakeLists.txt                 |   1 +
 src/iceberg/test/location_provider_test.cc      |   2 +-
 src/iceberg/test/meson.build                    |   1 +
 src/iceberg/test/partition_spec_test.cc         |   5 +-
 src/iceberg/test/transform_human_string_test.cc | 215 ++++++++++++++++++++++++
 src/iceberg/transform.cc                        |  74 ++++++++
 src/iceberg/transform.h                         |   6 +
 8 files changed, 302 insertions(+), 7 deletions(-)

diff --git a/src/iceberg/partition_spec.cc b/src/iceberg/partition_spec.cc
index 9c38d0c5..c00eab7d 100644
--- a/src/iceberg/partition_spec.cc
+++ b/src/iceberg/partition_spec.cc
@@ -111,9 +111,8 @@ Result<std::string> PartitionSpec::PartitionPath(const 
PartitionValues& data) co
     if (i > 0) {
       ss << "/";
     }
-    // TODO(zhuo.wang): transform for partition value, will be fixed after 
transform util
-    // is ready
-    std::string partition_value = value.get().ToString();
+    ICEBERG_ASSIGN_OR_RAISE(auto partition_value,
+                            fields_[i].transform()->ToHumanString(value));
     ss << UrlEncoder::Encode(fields_[i].name()) << "="
        << UrlEncoder::Encode(partition_value);
   }
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 6124b6bc..4f4516c7 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -68,6 +68,7 @@ add_iceberg_test(schema_test
                  schema_util_test.cc
                  sort_field_test.cc
                  sort_order_test.cc
+                 transform_human_string_test.cc
                  transform_test.cc
                  type_test.cc)
 
diff --git a/src/iceberg/test/location_provider_test.cc 
b/src/iceberg/test/location_provider_test.cc
index b287ded7..c78eb588 100644
--- a/src/iceberg/test/location_provider_test.cc
+++ b/src/iceberg/test/location_provider_test.cc
@@ -112,7 +112,7 @@ TEST_F(LocationProviderTest, ObjectStorageWithPartition) {
 
   std::vector<std::string> parts = SplitString(location, '/');
   ASSERT_GT(parts.size(), 2);
-  EXPECT_EQ("data%231=%22val%231%22", parts[parts.size() - 2]);
+  EXPECT_EQ("data%231=val%231", parts[parts.size() - 2]);
 }
 
 TEST_F(LocationProviderTest, ObjectStorageExcludePartitionInPath) {
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 95c68962..791340be 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -40,6 +40,7 @@ iceberg_tests = {
             'schema_util_test.cc',
             'sort_field_test.cc',
             'sort_order_test.cc',
+            'transform_human_string_test.cc',
             'transform_test.cc',
             'type_test.cc',
         ),
diff --git a/src/iceberg/test/partition_spec_test.cc 
b/src/iceberg/test/partition_spec_test.cc
index ea3ea6e1..6f1b4995 100644
--- a/src/iceberg/test/partition_spec_test.cc
+++ b/src/iceberg/test/partition_spec_test.cc
@@ -458,8 +458,7 @@ TEST(PartitionSpecTest, PartitionPath) {
     PartitionValues part_data(
         {Literal::Int(123), Literal::String("val2"), Literal::Date(19489)});
     ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
-    std::string expected =
-        "id_partition=123/name_partition=%22val2%22/ts_partition=19489";
+    std::string expected = 
"id_partition=123/name_partition=val2/ts_partition=2023-05-12";
     EXPECT_EQ(expected, path);
   }
 
@@ -469,7 +468,7 @@ TEST(PartitionSpecTest, PartitionPath) {
         {Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)});
     ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
     std::string expected =
-        "id_partition=123/name_partition=%22val%232%22/ts_partition=19489";
+        "id_partition=123/name_partition=val%232/ts_partition=2023-05-12";
     EXPECT_EQ(expected, path);
   }
 }
diff --git a/src/iceberg/test/transform_human_string_test.cc 
b/src/iceberg/test/transform_human_string_test.cc
new file mode 100644
index 00000000..28f4a484
--- /dev/null
+++ b/src/iceberg/test/transform_human_string_test.cc
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <memory>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/test/matchers.h"
+#include "iceberg/transform.h"
+
+namespace iceberg {
+
+struct HumanStringTestParam {
+  std::string test_name;
+  std::shared_ptr<Type> source_type;
+  Literal literal;
+  std::vector<std::string> expecteds;
+};
+
+class IdentityHumanStringTest : public 
::testing::TestWithParam<HumanStringTestParam> {
+ protected:
+  std::vector<std::shared_ptr<Transform>> transforms_{{Transform::Identity()}};
+};
+
+TEST_P(IdentityHumanStringTest, ToHumanString) {
+  const auto& param = GetParam();
+  for (int32_t i = 0; i < transforms_.size(); ++i) {
+    EXPECT_THAT(transforms_[i]->ToHumanString(param.literal),
+                HasValue(::testing::Eq(param.expecteds[i])));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    IdentityHumanStringTestCases, IdentityHumanStringTest,
+    ::testing::Values(
+        HumanStringTestParam{.test_name = "Null",
+                             .literal = 
Literal::Null(std::make_shared<IntType>()),
+                             .expecteds{"null"}},
+        HumanStringTestParam{.test_name = "Binary",
+                             .literal = 
Literal::Binary(std::vector<uint8_t>{1, 2, 3}),
+                             .expecteds{"AQID"}},
+        HumanStringTestParam{.test_name = "Fixed",
+                             .literal = Literal::Fixed(std::vector<uint8_t>{1, 
2, 3}),
+                             .expecteds{"AQID"}},
+        HumanStringTestParam{.test_name = "Date",
+                             .literal = Literal::Date(17501),
+                             .expecteds{"2017-12-01"}},
+        HumanStringTestParam{.test_name = "Time",
+                             .literal = Literal::Time(36775038194),
+                             .expecteds{"10:12:55.038194"}},
+        HumanStringTestParam{.test_name = "TimestampWithZone",
+                             .literal = Literal::TimestampTz(1512151975038194),
+                             .expecteds{"2017-12-01T18:12:55.038194+00:00"}},
+        HumanStringTestParam{.test_name = "TimestampWithoutZone",
+                             .literal = Literal::Timestamp(1512123175038194),
+                             .expecteds{"2017-12-01T10:12:55.038194"}},
+        HumanStringTestParam{.test_name = "Long",
+                             .literal = Literal::Long(-1234567890000L),
+                             .expecteds{"-1234567890000"}},
+        HumanStringTestParam{.test_name = "String",
+                             .literal = Literal::String("a/b/c=d"),
+                             .expecteds{"a/b/c=d"}}),
+    [](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
+      return info.param.test_name;
+    });
+
+class DateHumanStringTest : public 
::testing::TestWithParam<HumanStringTestParam> {
+ protected:
+  std::vector<std::shared_ptr<Transform>> transforms_{
+      Transform::Year(), Transform::Month(), Transform::Day()};
+};
+
+TEST_P(DateHumanStringTest, ToHumanString) {
+  const auto& param = GetParam();
+
+  for (uint32_t i = 0; i < transforms_.size(); i++) {
+    ICEBERG_UNWRAP_OR_FAIL(auto trans_func,
+                           transforms_[i]->Bind(std::make_shared<DateType>()));
+    ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
+    EXPECT_THAT(transforms_[i]->ToHumanString(literal),
+                HasValue(::testing::Eq(param.expecteds[i])));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    DateHumanStringTestCases, DateHumanStringTest,
+    ::testing::Values(
+        HumanStringTestParam{.test_name = "Date",
+                             .literal = Literal::Date(17501),
+                             .expecteds = {"2017", "2017-12", "2017-12-01"}},
+        HumanStringTestParam{.test_name = "NegativeDate",
+                             .literal = Literal::Date(-2),
+                             .expecteds = {"1969", "1969-12", "1969-12-30"}},
+        HumanStringTestParam{.test_name = "DateLowerBound",
+                             .literal = Literal::Date(0),
+                             .expecteds = {"1970", "1970-01", "1970-01-01"}},
+        HumanStringTestParam{.test_name = "NegativeDateLowerBound",
+                             .literal = Literal::Date(-365),
+                             .expecteds = {"1969", "1969-01", "1969-01-01"}},
+        HumanStringTestParam{.test_name = "NegativeDateUpperBound",
+                             .literal = Literal::Date(-1),
+                             .expecteds = {"1969", "1969-12", "1969-12-31"}},
+        HumanStringTestParam{.test_name = "Null",
+                             .literal = 
Literal::Null(std::make_shared<DateType>()),
+                             .expecteds = {"null", "null", "null"}}),
+    [](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
+      return info.param.test_name;
+    });
+
+class TimestampHumanStringTest : public 
::testing::TestWithParam<HumanStringTestParam> {
+ protected:
+  std::vector<std::shared_ptr<Transform>> transforms_{
+      Transform::Year(), Transform::Month(), Transform::Day(), 
Transform::Hour()};
+};
+
+TEST_F(TimestampHumanStringTest, InvalidType) {
+  ICEBERG_UNWRAP_OR_FAIL(auto above_max,
+                         Literal::Long(std::numeric_limits<int64_t>::max())
+                             .CastTo(std::make_shared<IntType>()));
+  ICEBERG_UNWRAP_OR_FAIL(auto below_min,
+                         Literal::Long(std::numeric_limits<int64_t>::min())
+                             .CastTo(std::make_shared<IntType>()));
+
+  auto unmatch_type_literal = 
Literal::Long(std::numeric_limits<int64_t>::max());
+
+  for (const auto& transform : transforms_) {
+    auto result = transform->ToHumanString(above_max);
+    EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
+    EXPECT_THAT(result,
+                HasErrorMessage("Cannot transfrom human string for value: 
aboveMax"));
+
+    result = transform->ToHumanString(below_min);
+    EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
+    EXPECT_THAT(result,
+                HasErrorMessage("Cannot transfrom human string for value: 
belowMin"));
+
+    result = transform->ToHumanString(unmatch_type_literal);
+    EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
+    EXPECT_THAT(result, HasErrorMessage(std::format(
+                            "Transfrom human {} from type {} is not supported",
+                            TransformTypeToString(transform->transform_type()),
+                            unmatch_type_literal.type()->ToString())));
+  }
+}
+
+TEST_P(TimestampHumanStringTest, ToHumanString) {
+  const auto& param = GetParam();
+  for (uint32_t i = 0; i < transforms_.size(); i++) {
+    ICEBERG_UNWRAP_OR_FAIL(auto trans_func, 
transforms_[i]->Bind(param.source_type));
+    ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
+    EXPECT_THAT(transforms_[i]->ToHumanString(literal),
+                HasValue(::testing::Eq(param.expecteds[i])));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    TimestampHumanStringTestCases, TimestampHumanStringTest,
+    ::testing::Values(
+        HumanStringTestParam{
+            .test_name = "Timestamp",
+            .source_type = std::make_shared<TimestampType>(),
+            .literal = Literal::Timestamp(1512123175038194),
+            .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}},
+        HumanStringTestParam{
+            .test_name = "NegativeTimestamp",
+            .source_type = std::make_shared<TimestampType>(),
+            .literal = Literal::Timestamp(-136024961806),
+            .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}},
+        HumanStringTestParam{
+            .test_name = "TimestampLowerBound",
+            .source_type = std::make_shared<TimestampType>(),
+            .literal = Literal::Timestamp(0),
+            .expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}},
+        HumanStringTestParam{
+            .test_name = "NegativeTimestampLowerBound",
+            .source_type = std::make_shared<TimestampType>(),
+            .literal = Literal::Timestamp(-172800000000),
+            .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"},
+        },
+        HumanStringTestParam{
+            .test_name = "NegativeTimestampUpperBound",
+            .source_type = std::make_shared<TimestampType>(),
+            .literal = Literal::Timestamp(-1),
+            .expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}},
+        HumanStringTestParam{
+            .test_name = "TimestampTz",
+            .source_type = std::make_shared<TimestampTzType>(),
+            .literal = Literal::TimestampTz(1512151975038194),
+            .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}},
+        HumanStringTestParam{.test_name = "Null",
+                             .source_type = std::make_shared<TimestampType>(),
+                             .literal = 
Literal::Null(std::make_shared<TimestampType>()),
+                             .expecteds = {"null", "null", "null", "null"}}),
+    [](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
+      return info.param.test_name;
+    });
+
+}  // namespace iceberg
diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc
index 560cc392..004111c3 100644
--- a/src/iceberg/transform.cc
+++ b/src/iceberg/transform.cc
@@ -31,6 +31,7 @@
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/macros.h"
 #include "iceberg/util/projection_util_internal.h"
+#include "iceberg/util/transform_util.h"
 
 namespace iceberg {
 namespace {
@@ -366,6 +367,79 @@ Result<std::unique_ptr<UnboundPredicate>> 
Transform::ProjectStrict(
   std::unreachable();
 }
 
+Result<std::string> Transform::ToHumanString(const Literal& value) {
+  if (value.IsNull()) {
+    return "null";
+  }
+
+  if (value.IsAboveMax() || value.IsBelowMin()) [[unlikely]] {
+    return NotSupported("Cannot transfrom human string for value: {}", 
value.ToString());
+  }
+
+  switch (transform_type_) {
+    case TransformType::kYear: {
+      if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+        return NotSupported("Transfrom human year from type {} is not 
supported",
+                            value.type()->ToString());
+      }
+      return TransformUtil::HumanYear(std::get<int32_t>(value.value()));
+    }
+    case TransformType::kMonth: {
+      if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+        return NotSupported("Transfrom human month from type {} is not 
supported",
+                            value.type()->ToString());
+      }
+      return TransformUtil::HumanMonth(std::get<int32_t>(value.value()));
+    }
+    case TransformType::kDay: {
+      if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+        return NotSupported("Transfrom human day from type {} is not 
supported",
+                            value.type()->ToString());
+      }
+      return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
+    }
+    case TransformType::kHour: {
+      if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+        return NotSupported("Transfrom human hour from type {} is not 
supported",
+                            value.type()->ToString());
+      }
+      return TransformUtil::HumanHour(std::get<int32_t>(value.value()));
+    }
+    case TransformType::kIdentity:
+    case TransformType::kBucket:
+    case TransformType::kTruncate:
+    case TransformType::kUnknown:
+    case TransformType::kVoid: {
+      switch (value.type()->type_id()) {
+        case TypeId::kDate:
+          return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
+        case TypeId::kTime:
+          return TransformUtil::HumanTime(std::get<int64_t>(value.value()));
+        case TypeId::kTimestamp:
+          return 
TransformUtil::HumanTimestamp(std::get<int64_t>(value.value()));
+        case TypeId::kTimestampTz:
+          return 
TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value.value()));
+        case TypeId::kFixed:
+        case TypeId::kBinary: {
+          const auto& binary_data = 
std::get<std::vector<uint8_t>>(value.value());
+          return TransformUtil::Base64Encode(
+              {reinterpret_cast<const char*>(binary_data.data()), 
binary_data.size()});
+        }
+        case TypeId::kDecimal: {
+          const auto& decimal_type = 
internal::checked_cast<DecimalType&>(*value.type());
+          const auto& decimal = std::get<::iceberg::Decimal>(value.value());
+          return decimal.ToString(decimal_type.scale());
+        }
+        case TypeId::kString:
+          return std::get<std::string>(value.value());
+        default:
+          return value.ToString();
+      }
+    }
+  }
+  std::unreachable();
+}
+
 bool TransformFunction::Equals(const TransformFunction& other) const {
   return transform_type_ == other.transform_type_ && *source_type_ == 
*other.source_type_;
 }
diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h
index 36da46d9..873b3ca6 100644
--- a/src/iceberg/transform.h
+++ b/src/iceberg/transform.h
@@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
   Result<std::unique_ptr<UnboundPredicate>> ProjectStrict(
       std::string_view name, const std::shared_ptr<BoundPredicate>& predicate);
 
+  /// \brief Returns a human-readable string representation of a transformed 
value.
+  ///
+  /// \param value The literal value to be transformed.
+  /// \return A human-readable string representation of the value
+  Result<std::string> ToHumanString(const Literal& value);
+
   /// \brief Returns a string representation of this transform (e.g., 
"bucket[16]").
   std::string ToString() const override;
 

Reply via email to