This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 84814bc1 feat: impl Transform::ToHumanString (#505)
84814bc1 is described below
commit 84814bc1003daf3f14138b80b8d2d9e07dd9e6a1
Author: wzhuo <[email protected]>
AuthorDate: Wed Jan 14 14:19:37 2026 +0800
feat: impl Transform::ToHumanString (#505)
---
src/iceberg/partition_spec.cc | 5 +-
src/iceberg/test/CMakeLists.txt | 1 +
src/iceberg/test/location_provider_test.cc | 2 +-
src/iceberg/test/meson.build | 1 +
src/iceberg/test/partition_spec_test.cc | 5 +-
src/iceberg/test/transform_human_string_test.cc | 215 ++++++++++++++++++++++++
src/iceberg/transform.cc | 74 ++++++++
src/iceberg/transform.h | 6 +
8 files changed, 302 insertions(+), 7 deletions(-)
diff --git a/src/iceberg/partition_spec.cc b/src/iceberg/partition_spec.cc
index 9c38d0c5..c00eab7d 100644
--- a/src/iceberg/partition_spec.cc
+++ b/src/iceberg/partition_spec.cc
@@ -111,9 +111,8 @@ Result<std::string> PartitionSpec::PartitionPath(const
PartitionValues& data) co
if (i > 0) {
ss << "/";
}
- // TODO(zhuo.wang): transform for partition value, will be fixed after
transform util
- // is ready
- std::string partition_value = value.get().ToString();
+ ICEBERG_ASSIGN_OR_RAISE(auto partition_value,
+ fields_[i].transform()->ToHumanString(value));
ss << UrlEncoder::Encode(fields_[i].name()) << "="
<< UrlEncoder::Encode(partition_value);
}
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 6124b6bc..4f4516c7 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -68,6 +68,7 @@ add_iceberg_test(schema_test
schema_util_test.cc
sort_field_test.cc
sort_order_test.cc
+ transform_human_string_test.cc
transform_test.cc
type_test.cc)
diff --git a/src/iceberg/test/location_provider_test.cc
b/src/iceberg/test/location_provider_test.cc
index b287ded7..c78eb588 100644
--- a/src/iceberg/test/location_provider_test.cc
+++ b/src/iceberg/test/location_provider_test.cc
@@ -112,7 +112,7 @@ TEST_F(LocationProviderTest, ObjectStorageWithPartition) {
std::vector<std::string> parts = SplitString(location, '/');
ASSERT_GT(parts.size(), 2);
- EXPECT_EQ("data%231=%22val%231%22", parts[parts.size() - 2]);
+ EXPECT_EQ("data%231=val%231", parts[parts.size() - 2]);
}
TEST_F(LocationProviderTest, ObjectStorageExcludePartitionInPath) {
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 95c68962..791340be 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -40,6 +40,7 @@ iceberg_tests = {
'schema_util_test.cc',
'sort_field_test.cc',
'sort_order_test.cc',
+ 'transform_human_string_test.cc',
'transform_test.cc',
'type_test.cc',
),
diff --git a/src/iceberg/test/partition_spec_test.cc
b/src/iceberg/test/partition_spec_test.cc
index ea3ea6e1..6f1b4995 100644
--- a/src/iceberg/test/partition_spec_test.cc
+++ b/src/iceberg/test/partition_spec_test.cc
@@ -458,8 +458,7 @@ TEST(PartitionSpecTest, PartitionPath) {
PartitionValues part_data(
{Literal::Int(123), Literal::String("val2"), Literal::Date(19489)});
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
- std::string expected =
- "id_partition=123/name_partition=%22val2%22/ts_partition=19489";
+ std::string expected =
"id_partition=123/name_partition=val2/ts_partition=2023-05-12";
EXPECT_EQ(expected, path);
}
@@ -469,7 +468,7 @@ TEST(PartitionSpecTest, PartitionPath) {
{Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)});
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
std::string expected =
- "id_partition=123/name_partition=%22val%232%22/ts_partition=19489";
+ "id_partition=123/name_partition=val%232/ts_partition=2023-05-12";
EXPECT_EQ(expected, path);
}
}
diff --git a/src/iceberg/test/transform_human_string_test.cc
b/src/iceberg/test/transform_human_string_test.cc
new file mode 100644
index 00000000..28f4a484
--- /dev/null
+++ b/src/iceberg/test/transform_human_string_test.cc
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <memory>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/test/matchers.h"
+#include "iceberg/transform.h"
+
+namespace iceberg {
+
+struct HumanStringTestParam {
+ std::string test_name;
+ std::shared_ptr<Type> source_type;
+ Literal literal;
+ std::vector<std::string> expecteds;
+};
+
+class IdentityHumanStringTest : public
::testing::TestWithParam<HumanStringTestParam> {
+ protected:
+ std::vector<std::shared_ptr<Transform>> transforms_{{Transform::Identity()}};
+};
+
+TEST_P(IdentityHumanStringTest, ToHumanString) {
+ const auto& param = GetParam();
+ for (int32_t i = 0; i < transforms_.size(); ++i) {
+ EXPECT_THAT(transforms_[i]->ToHumanString(param.literal),
+ HasValue(::testing::Eq(param.expecteds[i])));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IdentityHumanStringTestCases, IdentityHumanStringTest,
+ ::testing::Values(
+ HumanStringTestParam{.test_name = "Null",
+ .literal =
Literal::Null(std::make_shared<IntType>()),
+ .expecteds{"null"}},
+ HumanStringTestParam{.test_name = "Binary",
+ .literal =
Literal::Binary(std::vector<uint8_t>{1, 2, 3}),
+ .expecteds{"AQID"}},
+ HumanStringTestParam{.test_name = "Fixed",
+ .literal = Literal::Fixed(std::vector<uint8_t>{1,
2, 3}),
+ .expecteds{"AQID"}},
+ HumanStringTestParam{.test_name = "Date",
+ .literal = Literal::Date(17501),
+ .expecteds{"2017-12-01"}},
+ HumanStringTestParam{.test_name = "Time",
+ .literal = Literal::Time(36775038194),
+ .expecteds{"10:12:55.038194"}},
+ HumanStringTestParam{.test_name = "TimestampWithZone",
+ .literal = Literal::TimestampTz(1512151975038194),
+ .expecteds{"2017-12-01T18:12:55.038194+00:00"}},
+ HumanStringTestParam{.test_name = "TimestampWithoutZone",
+ .literal = Literal::Timestamp(1512123175038194),
+ .expecteds{"2017-12-01T10:12:55.038194"}},
+ HumanStringTestParam{.test_name = "Long",
+ .literal = Literal::Long(-1234567890000L),
+ .expecteds{"-1234567890000"}},
+ HumanStringTestParam{.test_name = "String",
+ .literal = Literal::String("a/b/c=d"),
+ .expecteds{"a/b/c=d"}}),
+ [](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
+ return info.param.test_name;
+ });
+
+class DateHumanStringTest : public
::testing::TestWithParam<HumanStringTestParam> {
+ protected:
+ std::vector<std::shared_ptr<Transform>> transforms_{
+ Transform::Year(), Transform::Month(), Transform::Day()};
+};
+
+TEST_P(DateHumanStringTest, ToHumanString) {
+ const auto& param = GetParam();
+
+ for (uint32_t i = 0; i < transforms_.size(); i++) {
+ ICEBERG_UNWRAP_OR_FAIL(auto trans_func,
+ transforms_[i]->Bind(std::make_shared<DateType>()));
+ ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
+ EXPECT_THAT(transforms_[i]->ToHumanString(literal),
+ HasValue(::testing::Eq(param.expecteds[i])));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ DateHumanStringTestCases, DateHumanStringTest,
+ ::testing::Values(
+ HumanStringTestParam{.test_name = "Date",
+ .literal = Literal::Date(17501),
+ .expecteds = {"2017", "2017-12", "2017-12-01"}},
+ HumanStringTestParam{.test_name = "NegativeDate",
+ .literal = Literal::Date(-2),
+ .expecteds = {"1969", "1969-12", "1969-12-30"}},
+ HumanStringTestParam{.test_name = "DateLowerBound",
+ .literal = Literal::Date(0),
+ .expecteds = {"1970", "1970-01", "1970-01-01"}},
+ HumanStringTestParam{.test_name = "NegativeDateLowerBound",
+ .literal = Literal::Date(-365),
+ .expecteds = {"1969", "1969-01", "1969-01-01"}},
+ HumanStringTestParam{.test_name = "NegativeDateUpperBound",
+ .literal = Literal::Date(-1),
+ .expecteds = {"1969", "1969-12", "1969-12-31"}},
+ HumanStringTestParam{.test_name = "Null",
+ .literal =
Literal::Null(std::make_shared<DateType>()),
+ .expecteds = {"null", "null", "null"}}),
+ [](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
+ return info.param.test_name;
+ });
+
+class TimestampHumanStringTest : public
::testing::TestWithParam<HumanStringTestParam> {
+ protected:
+ std::vector<std::shared_ptr<Transform>> transforms_{
+ Transform::Year(), Transform::Month(), Transform::Day(),
Transform::Hour()};
+};
+
+TEST_F(TimestampHumanStringTest, InvalidType) {
+ ICEBERG_UNWRAP_OR_FAIL(auto above_max,
+ Literal::Long(std::numeric_limits<int64_t>::max())
+ .CastTo(std::make_shared<IntType>()));
+ ICEBERG_UNWRAP_OR_FAIL(auto below_min,
+ Literal::Long(std::numeric_limits<int64_t>::min())
+ .CastTo(std::make_shared<IntType>()));
+
+ auto unmatch_type_literal =
Literal::Long(std::numeric_limits<int64_t>::max());
+
+ for (const auto& transform : transforms_) {
+ auto result = transform->ToHumanString(above_max);
+ EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
+ EXPECT_THAT(result,
+ HasErrorMessage("Cannot transfrom human string for value:
aboveMax"));
+
+ result = transform->ToHumanString(below_min);
+ EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
+ EXPECT_THAT(result,
+ HasErrorMessage("Cannot transfrom human string for value:
belowMin"));
+
+ result = transform->ToHumanString(unmatch_type_literal);
+ EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
+ EXPECT_THAT(result, HasErrorMessage(std::format(
+ "Transfrom human {} from type {} is not supported",
+ TransformTypeToString(transform->transform_type()),
+ unmatch_type_literal.type()->ToString())));
+ }
+}
+
+TEST_P(TimestampHumanStringTest, ToHumanString) {
+ const auto& param = GetParam();
+ for (uint32_t i = 0; i < transforms_.size(); i++) {
+ ICEBERG_UNWRAP_OR_FAIL(auto trans_func,
transforms_[i]->Bind(param.source_type));
+ ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
+ EXPECT_THAT(transforms_[i]->ToHumanString(literal),
+ HasValue(::testing::Eq(param.expecteds[i])));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ TimestampHumanStringTestCases, TimestampHumanStringTest,
+ ::testing::Values(
+ HumanStringTestParam{
+ .test_name = "Timestamp",
+ .source_type = std::make_shared<TimestampType>(),
+ .literal = Literal::Timestamp(1512123175038194),
+ .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}},
+ HumanStringTestParam{
+ .test_name = "NegativeTimestamp",
+ .source_type = std::make_shared<TimestampType>(),
+ .literal = Literal::Timestamp(-136024961806),
+ .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}},
+ HumanStringTestParam{
+ .test_name = "TimestampLowerBound",
+ .source_type = std::make_shared<TimestampType>(),
+ .literal = Literal::Timestamp(0),
+ .expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}},
+ HumanStringTestParam{
+ .test_name = "NegativeTimestampLowerBound",
+ .source_type = std::make_shared<TimestampType>(),
+ .literal = Literal::Timestamp(-172800000000),
+ .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"},
+ },
+ HumanStringTestParam{
+ .test_name = "NegativeTimestampUpperBound",
+ .source_type = std::make_shared<TimestampType>(),
+ .literal = Literal::Timestamp(-1),
+ .expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}},
+ HumanStringTestParam{
+ .test_name = "TimestampTz",
+ .source_type = std::make_shared<TimestampTzType>(),
+ .literal = Literal::TimestampTz(1512151975038194),
+ .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}},
+ HumanStringTestParam{.test_name = "Null",
+ .source_type = std::make_shared<TimestampType>(),
+ .literal =
Literal::Null(std::make_shared<TimestampType>()),
+ .expecteds = {"null", "null", "null", "null"}}),
+ [](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
+ return info.param.test_name;
+ });
+
+} // namespace iceberg
diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc
index 560cc392..004111c3 100644
--- a/src/iceberg/transform.cc
+++ b/src/iceberg/transform.cc
@@ -31,6 +31,7 @@
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/projection_util_internal.h"
+#include "iceberg/util/transform_util.h"
namespace iceberg {
namespace {
@@ -366,6 +367,79 @@ Result<std::unique_ptr<UnboundPredicate>>
Transform::ProjectStrict(
std::unreachable();
}
+Result<std::string> Transform::ToHumanString(const Literal& value) {
+ if (value.IsNull()) {
+ return "null";
+ }
+
+ if (value.IsAboveMax() || value.IsBelowMin()) [[unlikely]] {
+ return NotSupported("Cannot transfrom human string for value: {}",
value.ToString());
+ }
+
+ switch (transform_type_) {
+ case TransformType::kYear: {
+ if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+ return NotSupported("Transfrom human year from type {} is not
supported",
+ value.type()->ToString());
+ }
+ return TransformUtil::HumanYear(std::get<int32_t>(value.value()));
+ }
+ case TransformType::kMonth: {
+ if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+ return NotSupported("Transfrom human month from type {} is not
supported",
+ value.type()->ToString());
+ }
+ return TransformUtil::HumanMonth(std::get<int32_t>(value.value()));
+ }
+ case TransformType::kDay: {
+ if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+ return NotSupported("Transfrom human day from type {} is not
supported",
+ value.type()->ToString());
+ }
+ return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
+ }
+ case TransformType::kHour: {
+ if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
+ return NotSupported("Transfrom human hour from type {} is not
supported",
+ value.type()->ToString());
+ }
+ return TransformUtil::HumanHour(std::get<int32_t>(value.value()));
+ }
+ case TransformType::kIdentity:
+ case TransformType::kBucket:
+ case TransformType::kTruncate:
+ case TransformType::kUnknown:
+ case TransformType::kVoid: {
+ switch (value.type()->type_id()) {
+ case TypeId::kDate:
+ return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
+ case TypeId::kTime:
+ return TransformUtil::HumanTime(std::get<int64_t>(value.value()));
+ case TypeId::kTimestamp:
+ return
TransformUtil::HumanTimestamp(std::get<int64_t>(value.value()));
+ case TypeId::kTimestampTz:
+ return
TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value.value()));
+ case TypeId::kFixed:
+ case TypeId::kBinary: {
+ const auto& binary_data =
std::get<std::vector<uint8_t>>(value.value());
+ return TransformUtil::Base64Encode(
+ {reinterpret_cast<const char*>(binary_data.data()),
binary_data.size()});
+ }
+ case TypeId::kDecimal: {
+ const auto& decimal_type =
internal::checked_cast<DecimalType&>(*value.type());
+ const auto& decimal = std::get<::iceberg::Decimal>(value.value());
+ return decimal.ToString(decimal_type.scale());
+ }
+ case TypeId::kString:
+ return std::get<std::string>(value.value());
+ default:
+ return value.ToString();
+ }
+ }
+ }
+ std::unreachable();
+}
+
bool TransformFunction::Equals(const TransformFunction& other) const {
return transform_type_ == other.transform_type_ && *source_type_ ==
*other.source_type_;
}
diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h
index 36da46d9..873b3ca6 100644
--- a/src/iceberg/transform.h
+++ b/src/iceberg/transform.h
@@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
Result<std::unique_ptr<UnboundPredicate>> ProjectStrict(
std::string_view name, const std::shared_ptr<BoundPredicate>& predicate);
+ /// \brief Returns a human-readable string representation of a transformed
value.
+ ///
+ /// \param value The literal value to be transformed.
+ /// \return A human-readable string representation of the value
+ Result<std::string> ToHumanString(const Literal& value);
+
/// \brief Returns a string representation of this transform (e.g.,
"bucket[16]").
std::string ToString() const override;