This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 4ebe732  fix: result type of DayTransform should be date (#285)
4ebe732 is described below

commit 4ebe732cd3333deb90ac5a60739540b7d8f9eb6f
Author: Junwang Zhao <[email protected]>
AuthorDate: Tue Nov 4 10:09:43 2025 +0800

    fix: result type of DayTransform should be date (#285)
    
    Result type of Day transform should be `int` per the spec, but in the
    Java impl, it is `DateType`, this discrepancy has been discussed on
    devlist, see [1]. iceberg-iceberg and iceberg-rust all conform to the
    java impl, so make iceberg-cpp do the same.
    
    The added comments are borrowed from iceberg-python.
    
    [1] https://lists.apache.org/thread/2gq7b54nvc9q6f1j08l9lnzgm5onkmx5
---
 src/iceberg/test/partition_spec_test.cc | 52 +++++++++++++++++++++++++++++++++
 src/iceberg/test/transform_test.cc      |  2 +-
 src/iceberg/transform.h                 |  5 ++++
 src/iceberg/transform_function.cc       |  2 +-
 src/iceberg/transform_function.h        |  6 +++-
 5 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/src/iceberg/test/partition_spec_test.cc 
b/src/iceberg/test/partition_spec_test.cc
index 538d89a..d50b537 100644
--- a/src/iceberg/test/partition_spec_test.cc
+++ b/src/iceberg/test/partition_spec_test.cc
@@ -24,7 +24,9 @@
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include <nlohmann/json.hpp>
 
+#include "iceberg/json_internal.h"
 #include "iceberg/partition_field.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
@@ -106,4 +108,54 @@ TEST(PartitionSpecTest, PartitionSchemaTest) {
   EXPECT_EQ(pt_field2.name(), partition_schema.value()->fields()[1].name());
   EXPECT_EQ(pt_field2.field_id(), 
partition_schema.value()->fields()[1].field_id());
 }
+
+TEST(PartitionSpecTest, PartitionTypeTest) {
+  nlohmann::json json = R"(
+  {
+  "spec-id": 1,
+  "fields": [ {
+      "source-id": 4,
+      "field-id": 1000,
+      "name": "ts_day",
+      "transform": "day"
+      }, {
+      "source-id": 1,
+      "field-id": 1001,
+      "name": "id_bucket",
+      "transform": "bucket[16]"
+      }, {
+      "source-id": 2,
+      "field-id": 1002,
+      "name": "id_truncate",
+      "transform": "truncate[4]"
+      } ]
+  })"_json;
+
+  SchemaField field1(1, "id", int32(), false);
+  SchemaField field2(2, "name", string(), false);
+  SchemaField field3(3, "ts", timestamp(), false);
+  SchemaField field4(4, "ts_day", timestamp(), false);
+  SchemaField field5(5, "id_bucket", int32(), false);
+  SchemaField field6(6, "id_truncate", int32(), false);
+  auto const schema = std::make_shared<Schema>(
+      std::vector<SchemaField>{field1, field2, field3, field4, field5, field6},
+      Schema::kInitialSchemaId);
+
+  auto parsed_spec_result = PartitionSpecFromJson(schema, json);
+  ASSERT_TRUE(parsed_spec_result.has_value()) << 
parsed_spec_result.error().message;
+
+  auto partition_schema = parsed_spec_result.value()->PartitionType();
+
+  SchemaField pt_field1(1000, "ts_day", date(), true);
+  SchemaField pt_field2(1001, "id_bucket", int32(), true);
+  SchemaField pt_field3(1002, "id_truncate", string(), true);
+
+  ASSERT_TRUE(partition_schema.has_value());
+  ASSERT_EQ(3, partition_schema.value()->fields().size());
+
+  EXPECT_EQ(pt_field1, partition_schema.value()->fields()[0]);
+  EXPECT_EQ(pt_field2, partition_schema.value()->fields()[1]);
+  EXPECT_EQ(pt_field3, partition_schema.value()->fields()[2]);
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/test/transform_test.cc 
b/src/iceberg/test/transform_test.cc
index 90f7abb..79d2564 100644
--- a/src/iceberg/test/transform_test.cc
+++ b/src/iceberg/test/transform_test.cc
@@ -134,7 +134,7 @@ TEST(TransformResultTypeTest, PositiveCases) {
        .expected_result_type = iceberg::int32()},
       {.str = "day",
        .source_type = iceberg::timestamp(),
-       .expected_result_type = iceberg::int32()},
+       .expected_result_type = iceberg::date()},
       {.str = "hour",
        .source_type = iceberg::timestamp(),
        .expected_result_type = iceberg::int32()},
diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h
index e5a0823..d06e31f 100644
--- a/src/iceberg/transform.h
+++ b/src/iceberg/transform.h
@@ -202,6 +202,11 @@ class ICEBERG_EXPORT TransformFunction {
   /// \brief Get the source type of transform function
   const std::shared_ptr<Type>& source_type() const;
   /// \brief Get the result type of transform function
+  ///
+  /// Note: This method defines both the physical and display representation 
of the
+  /// partition field. The physical representation must conform to the Iceberg 
spec. The
+  /// display representation can deviate from the spec, such as by 
transforming the value
+  /// into a more human-readable format.
   virtual std::shared_ptr<Type> ResultType() const = 0;
 
   friend bool operator==(const TransformFunction& lhs, const 
TransformFunction& rhs) {
diff --git a/src/iceberg/transform_function.cc 
b/src/iceberg/transform_function.cc
index e2f5ece..9213d2c 100644
--- a/src/iceberg/transform_function.cc
+++ b/src/iceberg/transform_function.cc
@@ -193,7 +193,7 @@ Result<Literal> DayTransform::Transform(const Literal& 
literal) {
   return TemporalUtils::ExtractDay(literal);
 }
 
-std::shared_ptr<Type> DayTransform::ResultType() const { return int32(); }
+std::shared_ptr<Type> DayTransform::ResultType() const { return date(); }
 
 Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
     std::shared_ptr<Type> const& source_type) {
diff --git a/src/iceberg/transform_function.h b/src/iceberg/transform_function.h
index fc0dd72..33712ca 100644
--- a/src/iceberg/transform_function.h
+++ b/src/iceberg/transform_function.h
@@ -141,7 +141,11 @@ class ICEBERG_EXPORT DayTransform : public 
TransformFunction {
   /// \brief Extract a date or timestamp day, as days from 1970-01-01.
   Result<Literal> Transform(const Literal& literal) override;
 
-  /// \brief Returns INT32 as the output type.
+  /// \brief Return the result type of a day transform.
+  ///
+  /// Note: The physical representation conforms to the Iceberg spec as 
DateType is
+  /// internally converted to int. The DateType returned here provides a more
+  /// human-readable way to display the partition field.
   std::shared_ptr<Type> ResultType() const override;
 
   /// \brief Create a DayTransform.

Reply via email to