This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 4ebe732 fix: result type of DayTransform should be date (#285)
4ebe732 is described below
commit 4ebe732cd3333deb90ac5a60739540b7d8f9eb6f
Author: Junwang Zhao <[email protected]>
AuthorDate: Tue Nov 4 10:09:43 2025 +0800
fix: result type of DayTransform should be date (#285)
Result type of Day transform should be `int` per the spec, but in the
Java impl, it is `DateType`, this discrepancy has been discussed on
devlist, see [1]. iceberg-iceberg and iceberg-rust all conform to the
java impl, so make iceberg-cpp do the same.
The added comments are borrowed from iceberg-python.
[1] https://lists.apache.org/thread/2gq7b54nvc9q6f1j08l9lnzgm5onkmx5
---
src/iceberg/test/partition_spec_test.cc | 52 +++++++++++++++++++++++++++++++++
src/iceberg/test/transform_test.cc | 2 +-
src/iceberg/transform.h | 5 ++++
src/iceberg/transform_function.cc | 2 +-
src/iceberg/transform_function.h | 6 +++-
5 files changed, 64 insertions(+), 3 deletions(-)
diff --git a/src/iceberg/test/partition_spec_test.cc
b/src/iceberg/test/partition_spec_test.cc
index 538d89a..d50b537 100644
--- a/src/iceberg/test/partition_spec_test.cc
+++ b/src/iceberg/test/partition_spec_test.cc
@@ -24,7 +24,9 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>
+#include <nlohmann/json.hpp>
+#include "iceberg/json_internal.h"
#include "iceberg/partition_field.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
@@ -106,4 +108,54 @@ TEST(PartitionSpecTest, PartitionSchemaTest) {
EXPECT_EQ(pt_field2.name(), partition_schema.value()->fields()[1].name());
EXPECT_EQ(pt_field2.field_id(),
partition_schema.value()->fields()[1].field_id());
}
+
+TEST(PartitionSpecTest, PartitionTypeTest) {
+ nlohmann::json json = R"(
+ {
+ "spec-id": 1,
+ "fields": [ {
+ "source-id": 4,
+ "field-id": 1000,
+ "name": "ts_day",
+ "transform": "day"
+ }, {
+ "source-id": 1,
+ "field-id": 1001,
+ "name": "id_bucket",
+ "transform": "bucket[16]"
+ }, {
+ "source-id": 2,
+ "field-id": 1002,
+ "name": "id_truncate",
+ "transform": "truncate[4]"
+ } ]
+ })"_json;
+
+ SchemaField field1(1, "id", int32(), false);
+ SchemaField field2(2, "name", string(), false);
+ SchemaField field3(3, "ts", timestamp(), false);
+ SchemaField field4(4, "ts_day", timestamp(), false);
+ SchemaField field5(5, "id_bucket", int32(), false);
+ SchemaField field6(6, "id_truncate", int32(), false);
+ auto const schema = std::make_shared<Schema>(
+ std::vector<SchemaField>{field1, field2, field3, field4, field5, field6},
+ Schema::kInitialSchemaId);
+
+ auto parsed_spec_result = PartitionSpecFromJson(schema, json);
+ ASSERT_TRUE(parsed_spec_result.has_value()) <<
parsed_spec_result.error().message;
+
+ auto partition_schema = parsed_spec_result.value()->PartitionType();
+
+ SchemaField pt_field1(1000, "ts_day", date(), true);
+ SchemaField pt_field2(1001, "id_bucket", int32(), true);
+ SchemaField pt_field3(1002, "id_truncate", string(), true);
+
+ ASSERT_TRUE(partition_schema.has_value());
+ ASSERT_EQ(3, partition_schema.value()->fields().size());
+
+ EXPECT_EQ(pt_field1, partition_schema.value()->fields()[0]);
+ EXPECT_EQ(pt_field2, partition_schema.value()->fields()[1]);
+ EXPECT_EQ(pt_field3, partition_schema.value()->fields()[2]);
+}
+
} // namespace iceberg
diff --git a/src/iceberg/test/transform_test.cc
b/src/iceberg/test/transform_test.cc
index 90f7abb..79d2564 100644
--- a/src/iceberg/test/transform_test.cc
+++ b/src/iceberg/test/transform_test.cc
@@ -134,7 +134,7 @@ TEST(TransformResultTypeTest, PositiveCases) {
.expected_result_type = iceberg::int32()},
{.str = "day",
.source_type = iceberg::timestamp(),
- .expected_result_type = iceberg::int32()},
+ .expected_result_type = iceberg::date()},
{.str = "hour",
.source_type = iceberg::timestamp(),
.expected_result_type = iceberg::int32()},
diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h
index e5a0823..d06e31f 100644
--- a/src/iceberg/transform.h
+++ b/src/iceberg/transform.h
@@ -202,6 +202,11 @@ class ICEBERG_EXPORT TransformFunction {
/// \brief Get the source type of transform function
const std::shared_ptr<Type>& source_type() const;
/// \brief Get the result type of transform function
+ ///
+ /// Note: This method defines both the physical and display representation
of the
+ /// partition field. The physical representation must conform to the Iceberg
spec. The
+ /// display representation can deviate from the spec, such as by
transforming the value
+ /// into a more human-readable format.
virtual std::shared_ptr<Type> ResultType() const = 0;
friend bool operator==(const TransformFunction& lhs, const
TransformFunction& rhs) {
diff --git a/src/iceberg/transform_function.cc
b/src/iceberg/transform_function.cc
index e2f5ece..9213d2c 100644
--- a/src/iceberg/transform_function.cc
+++ b/src/iceberg/transform_function.cc
@@ -193,7 +193,7 @@ Result<Literal> DayTransform::Transform(const Literal&
literal) {
return TemporalUtils::ExtractDay(literal);
}
-std::shared_ptr<Type> DayTransform::ResultType() const { return int32(); }
+std::shared_ptr<Type> DayTransform::ResultType() const { return date(); }
Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
std::shared_ptr<Type> const& source_type) {
diff --git a/src/iceberg/transform_function.h b/src/iceberg/transform_function.h
index fc0dd72..33712ca 100644
--- a/src/iceberg/transform_function.h
+++ b/src/iceberg/transform_function.h
@@ -141,7 +141,11 @@ class ICEBERG_EXPORT DayTransform : public
TransformFunction {
/// \brief Extract a date or timestamp day, as days from 1970-01-01.
Result<Literal> Transform(const Literal& literal) override;
- /// \brief Returns INT32 as the output type.
+ /// \brief Return the result type of a day transform.
+ ///
+ /// Note: The physical representation conforms to the Iceberg spec as
DateType is
+ /// internally converted to int. The DateType returned here provides a more
+ /// human-readable way to display the partition field.
std::shared_ptr<Type> ResultType() const override;
/// \brief Create a DayTransform.