This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 0f44ce26 feat: add utility to print transformed value as human string 
(#501)
0f44ce26 is described below

commit 0f44ce26e69780e64f1fa4c873782eab2d125d8b
Author: wzhuo <[email protected]>
AuthorDate: Mon Jan 12 13:09:59 2026 +0800

    feat: add utility to print transformed value as human string (#501)
---
 src/iceberg/CMakeLists.txt              |   1 +
 src/iceberg/meson.build                 |   1 +
 src/iceberg/test/CMakeLists.txt         |   1 +
 src/iceberg/test/meson.build            |   1 +
 src/iceberg/test/transform_util_test.cc | 160 ++++++++++++++++++++++++++++++++
 src/iceberg/util/meson.build            |   1 +
 src/iceberg/util/transform_util.cc      | 144 ++++++++++++++++++++++++++++
 src/iceberg/util/transform_util.h       | 105 +++++++++++++++++++++
 8 files changed, 414 insertions(+)

diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 2ecd652f..47a2cabc 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -95,6 +95,7 @@ set(ICEBERG_SOURCES
     util/snapshot_util.cc
     util/temporal_util.cc
     util/timepoint.cc
+    util/transform_util.cc
     util/truncate_util.cc
     util/type_util.cc
     util/url_encoder.cc
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 78ebd604..1a59c894 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -116,6 +116,7 @@ iceberg_sources = files(
     'util/snapshot_util.cc',
     'util/temporal_util.cc',
     'util/timepoint.cc',
+    'util/transform_util.cc',
     'util/truncate_util.cc',
     'util/type_util.cc',
     'util/url_encoder.cc',
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 4e86576e..0e41fcfb 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -111,6 +111,7 @@ add_iceberg_test(util_test
                  formatter_test.cc
                  location_util_test.cc
                  string_util_test.cc
+                 transform_util_test.cc
                  truncate_util_test.cc
                  url_encoder_test.cc
                  uuid_test.cc
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 50422ccc..e15a7a9e 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -87,6 +87,7 @@ iceberg_tests = {
             'formatter_test.cc',
             'location_util_test.cc',
             'string_util_test.cc',
+            'transform_util_test.cc',
             'truncate_util_test.cc',
             'url_encoder_test.cc',
             'uuid_test.cc',
diff --git a/src/iceberg/test/transform_util_test.cc 
b/src/iceberg/test/transform_util_test.cc
new file mode 100644
index 00000000..76f6824b
--- /dev/null
+++ b/src/iceberg/test/transform_util_test.cc
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/transform_util.h"
+
+#include <gtest/gtest.h>
+
+namespace iceberg {
+
+TEST(TransformUtilTest, HumanYear) {
+  EXPECT_EQ("1970", TransformUtil::HumanYear(0));
+  EXPECT_EQ("1971", TransformUtil::HumanYear(1));
+  EXPECT_EQ("1969", TransformUtil::HumanYear(-1));
+  EXPECT_EQ("0999", TransformUtil::HumanYear(999 - 1970));
+  EXPECT_EQ("2026", TransformUtil::HumanYear(56));
+}
+
+TEST(TransformUtilTest, HumanMonth) {
+  // 0 is January 1970
+  EXPECT_EQ("1970-01", TransformUtil::HumanMonth(0));
+  // 1 is Febrary 1970
+  EXPECT_EQ("1970-02", TransformUtil::HumanMonth(1));
+  // -1 is December 1969
+  EXPECT_EQ("1969-12", TransformUtil::HumanMonth(-1));
+  // 0999-12
+  EXPECT_EQ("0999-12", TransformUtil::HumanMonth(-11641));
+  // 12 is January 1971
+  EXPECT_EQ("1971-01", TransformUtil::HumanMonth(12));
+  // 672 is December 2026-01
+  EXPECT_EQ("2026-01", TransformUtil::HumanMonth(672));
+}
+
+TEST(TransformUtilTest, HumanDay) {
+  // 0 is Unix epoch (1970-01-01)
+  EXPECT_EQ("1970-01-01", TransformUtil::HumanDay(0));
+  // 1 is 1970-01-02
+  EXPECT_EQ("1970-01-02", TransformUtil::HumanDay(1));
+  // -1 is 1969-12-31
+  EXPECT_EQ("1969-12-31", TransformUtil::HumanDay(-1));
+  // 0999-12-31
+  EXPECT_EQ("0999-12-31", TransformUtil::HumanDay(-354286));
+  // 365 is 1971-01-01 (non-leap year)
+  EXPECT_EQ("1971-01-01", TransformUtil::HumanDay(365));
+  // 20454 is 2026-01-01
+  EXPECT_EQ("2026-01-01", TransformUtil::HumanDay(20454));
+}
+
+TEST(TransformUtilTest, HumanHour) {
+  // 0 is Unix epoch at 00:00
+  EXPECT_EQ("1970-01-01-00", TransformUtil::HumanHour(0));
+  // 1 is first hour of epoch
+  EXPECT_EQ("1970-01-01-01", TransformUtil::HumanHour(1));
+  // -1 is previous day's last hour
+  EXPECT_EQ("1969-12-31-23", TransformUtil::HumanHour(-1));
+  // 999-12-31 at 23:00
+  EXPECT_EQ("0999-12-31-23", TransformUtil::HumanHour(-8502841));
+  // 24 is next day at 00:00
+  EXPECT_EQ("1970-01-02-00", TransformUtil::HumanHour(24));
+  // 490896 is 2026-01-01 at 00:00
+  EXPECT_EQ("2026-01-01-00", TransformUtil::HumanHour(490896));
+}
+
+TEST(TransformUtilTest, HumanTime) {
+  // Midnight
+  EXPECT_EQ("00:00", TransformUtil::HumanTime(0));
+  // 1 second after midnight
+  EXPECT_EQ("00:00:01", TransformUtil::HumanTime(1000000));
+  // 1.5 seconds after midnight
+  EXPECT_EQ("00:00:01.500", TransformUtil::HumanTime(1500000));
+  // 1.001 seconds after midnight
+  EXPECT_EQ("00:00:01.001", TransformUtil::HumanTime(1001000));
+  // 1.000001 seconds after midnight
+  EXPECT_EQ("00:00:01.000001", TransformUtil::HumanTime(1000001));
+  // 1 hour, 2 minutes, 3 seconds
+  EXPECT_EQ("01:02:03", TransformUtil::HumanTime(3723000000));
+  // 23:59:59
+  EXPECT_EQ("23:59:59", TransformUtil::HumanTime(86399000000));
+}
+
+TEST(TransformUtilTest, HumanTimestamp) {
+  // Unix epoch
+  EXPECT_EQ("1970-01-01T00:00:00", TransformUtil::HumanTimestamp(0));
+  // 1 second after epoch
+  EXPECT_EQ("1970-01-01T00:00:01", TransformUtil::HumanTimestamp(1000000));
+  // 1 second before epoch
+  EXPECT_EQ("1969-12-31T23:59:59", TransformUtil::HumanTimestamp(-1000000));
+  // 0999-12-31T23:59:59
+  EXPECT_EQ("0999-12-31T23:59:59", 
TransformUtil::HumanTimestamp(-30610224001000000L));
+  // precistion with 500 milliseconds
+  EXPECT_EQ("2026-01-01T00:00:01.500", 
TransformUtil::HumanTimestamp(1767225601500000L));
+  // precision with 1 millisecond
+  EXPECT_EQ("2026-01-01T00:00:01.001", 
TransformUtil::HumanTimestamp(1767225601001000L));
+  // precision with 1 microsecond
+  EXPECT_EQ("2026-01-01T00:00:01.000001",
+            TransformUtil::HumanTimestamp(1767225601000001L));
+}
+
+TEST(TransformUtilTest, HumanTimestampWithZone) {
+  // Unix epoch
+  EXPECT_EQ("1970-01-01T00:00:00+00:00", 
TransformUtil::HumanTimestampWithZone(0));
+  // 1 second after epoch
+  EXPECT_EQ("1970-01-01T00:00:01+00:00", 
TransformUtil::HumanTimestampWithZone(1000000));
+  // 1 second before epoch
+  EXPECT_EQ("1969-12-31T23:59:59+00:00", 
TransformUtil::HumanTimestampWithZone(-1000000));
+  // 0999-12-31T23:59:59
+  EXPECT_EQ("0999-12-31T23:59:59+00:00",
+            TransformUtil::HumanTimestampWithZone(-30610224001000000L));
+  // precistion with 500 milliseconds
+  EXPECT_EQ("2026-01-01T00:00:01.500+00:00",
+            TransformUtil::HumanTimestampWithZone(1767225601500000L));
+  // precision with 1 millisecond
+  EXPECT_EQ("2026-01-01T00:00:01.001+00:00",
+            TransformUtil::HumanTimestampWithZone(1767225601001000L));
+  // precision with 1 microsecond
+  EXPECT_EQ("2026-01-01T00:00:01.000001+00:00",
+            TransformUtil::HumanTimestampWithZone(1767225601000001L));
+}
+
+TEST(TransformUtilTest, Base64Encode) {
+  // Empty string
+  EXPECT_EQ("", TransformUtil::Base64Encode(""));
+
+  // Single character
+  EXPECT_EQ("YQ==", TransformUtil::Base64Encode("a"));
+  EXPECT_EQ("YWI=", TransformUtil::Base64Encode("ab"));
+  EXPECT_EQ("YWJj", TransformUtil::Base64Encode("abc"));
+
+  // Multiple of 3 characters
+  EXPECT_EQ("YWJjZGU=", TransformUtil::Base64Encode("abcde"));
+  EXPECT_EQ("YWJjZGVm", TransformUtil::Base64Encode("abcdef"));
+
+  // Common strings
+  EXPECT_EQ("U29tZSBkYXRhIHdpdGggY2hhcmFjdGVycw==",
+            TransformUtil::Base64Encode("Some data with characters"));
+  EXPECT_EQ("aGVsbG8=", TransformUtil::Base64Encode("hello"));
+  EXPECT_EQ("dGVzdCBzdHJpbmc=", TransformUtil::Base64Encode("test string"));
+
+  // Unicode
+  EXPECT_EQ("8J+EgA==", TransformUtil::Base64Encode("\xF0\x9F\x84\x80"));
+  // Null byte
+  EXPECT_EQ("AA==", TransformUtil::Base64Encode({"\x00", 1}));
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build
index b3866b70..95952bb8 100644
--- a/src/iceberg/util/meson.build
+++ b/src/iceberg/util/meson.build
@@ -36,6 +36,7 @@ install_headers(
         'string_util.h',
         'temporal_util.h',
         'timepoint.h',
+        'transform_util.h',
         'truncate_util.h',
         'type_util.h',
         'url_encoder.h',
diff --git a/src/iceberg/util/transform_util.cc 
b/src/iceberg/util/transform_util.cc
new file mode 100644
index 00000000..fe152343
--- /dev/null
+++ b/src/iceberg/util/transform_util.cc
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/transform_util.h"
+
+#include <array>
+#include <chrono>
+
+namespace iceberg {
+
+namespace {
+constexpr auto kEpochDate = std::chrono::year{1970} / std::chrono::January / 1;
+constexpr int64_t kMicrosPerMillis = 1'000;
+constexpr int64_t kMicrosPerSecond = 1'000'000;
+}  // namespace
+
+std::string TransformUtil::HumanYear(int32_t year_ordinal) {
+  auto y = kEpochDate + std::chrono::years{year_ordinal};
+  return std::format("{:%Y}", y);
+}
+
+std::string TransformUtil::HumanMonth(int32_t month_ordinal) {
+  auto ym = kEpochDate + std::chrono::months(month_ordinal);
+  return std::format("{:%Y-%m}", ym);
+}
+
+std::string TransformUtil::HumanDay(int32_t day_ordinal) {
+  auto ymd = std::chrono::sys_days(kEpochDate) + 
std::chrono::days{day_ordinal};
+  return std::format("{:%F}", ymd);
+}
+
+std::string TransformUtil::HumanHour(int32_t hour_ordinal) {
+  auto tp = std::chrono::time_point<std::chrono::system_clock, 
std::chrono::hours>{
+      std::chrono::hours{hour_ordinal}};
+  return std::format("{:%F-%H}", tp);
+}
+
+std::string TransformUtil::HumanTime(int64_t micros_from_midnight) {
+  std::chrono::hh_mm_ss<std::chrono::seconds> hms{
+      std::chrono::seconds{micros_from_midnight / kMicrosPerSecond}};
+  auto micros = micros_from_midnight % kMicrosPerSecond;
+  if (micros == 0 && hms.seconds().count() == 0) {
+    return std::format("{:%R}", hms);
+  } else if (micros == 0) {
+    return std::format("{:%T}", hms);
+  } else if (micros % kMicrosPerMillis == 0) {
+    return std::format("{:%T}.{:03d}", hms, micros / kMicrosPerMillis);
+  } else {
+    return std::format("{:%T}.{:06d}", hms, micros);
+  }
+}
+
+std::string TransformUtil::HumanTimestamp(int64_t timestamp_micros) {
+  auto tp = std::chrono::time_point<std::chrono::system_clock, 
std::chrono::seconds>{
+      std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
+  auto micros = timestamp_micros % kMicrosPerSecond;
+  if (micros == 0) {
+    return std::format("{:%FT%T}", tp);
+  } else if (micros % kMicrosPerMillis == 0) {
+    return std::format("{:%FT%T}.{:03d}", tp, micros / kMicrosPerMillis);
+  } else {
+    return std::format("{:%FT%T}.{:06d}", tp, micros);
+  }
+}
+
+std::string TransformUtil::HumanTimestampWithZone(int64_t timestamp_micros) {
+  auto tp = std::chrono::time_point<std::chrono::system_clock, 
std::chrono::seconds>{
+      std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
+  auto micros = timestamp_micros % kMicrosPerSecond;
+  if (micros == 0) {
+    return std::format("{:%FT%T}+00:00", tp);
+  } else if (micros % kMicrosPerMillis == 0) {
+    return std::format("{:%FT%T}.{:03d}+00:00", tp, micros / kMicrosPerMillis);
+  } else {
+    return std::format("{:%FT%T}.{:06d}+00:00", tp, micros);
+  }
+}
+
+std::string TransformUtil::Base64Encode(std::string_view str_to_encode) {
+  static constexpr std::string_view kBase64Chars =
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+  int32_t i = 0;
+  int32_t j = 0;
+  std::array<unsigned char, 3> char_array_3;
+  std::array<unsigned char, 4> char_array_4;
+
+  std::string encoded;
+  encoded.reserve((str_to_encode.size() + 2) * 4 / 3);
+
+  for (unsigned char byte : str_to_encode) {
+    char_array_3[i++] = byte;
+    if (i == 3) {
+      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 
0xf0) >> 4);
+      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 
0xc0) >> 6);
+      char_array_4[3] = char_array_3[2] & 0x3f;
+
+      for (j = 0; j < 4; j++) {
+        encoded += kBase64Chars[char_array_4[j]];
+      }
+
+      i = 0;
+    }
+  }
+
+  if (i) {
+    for (j = i; j < 3; j++) {
+      char_array_3[j] = '\0';
+    }
+
+    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 
0xf0) >> 4);
+    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 
0xc0) >> 6);
+    char_array_4[3] = char_array_3[2] & 0x3f;
+
+    for (j = 0; j < i + 1; j++) {
+      encoded += kBase64Chars[char_array_4[j]];
+    }
+
+    while (i++ < 3) {
+      encoded += '=';
+    }
+  }
+
+  return encoded;
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/util/transform_util.h 
b/src/iceberg/util/transform_util.h
new file mode 100644
index 00000000..7482b0db
--- /dev/null
+++ b/src/iceberg/util/transform_util.h
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "iceberg/iceberg_export.h"
+
+namespace iceberg {
+
+class ICEBERG_EXPORT TransformUtil {
+ public:
+  /// \brief Returns a human-readable string for a year.
+  ///
+  /// The string is formatted as "yyyy".
+  ///
+  /// \param year The year to format.
+  /// \return A human-readable string for the year.
+  static std::string HumanYear(int32_t year);
+
+  /// \brief Returns a human-readable string for a month.
+  ///
+  /// The string is formatted as "yyyy-MM".
+  ///
+  /// \param month The month to format.
+  /// \return A human-readable string for the month.
+  static std::string HumanMonth(int32_t month);
+
+  /// \brief Returns a human-readable string for the given day ordinal.
+  ///
+  /// The string is formatted as: `yyyy-MM-dd`.
+  ///
+  /// \param day_ordinal The day ordinal.
+  /// \return A human-readable string for the given day ordinal.
+  static std::string HumanDay(int32_t day_ordinal);
+
+  /// \brief Returns a human-readable string for the given hour ordinal.
+  ///
+  /// The string is formatted as: `yyyy-MM-dd-HH`.
+  ///
+  /// \param hour_ordinal The hour ordinal.
+  /// \return A human-readable string for the given hour ordinal.
+  static std::string HumanHour(int32_t hour_ordinal);
+
+  /// \brief Outputs this time as a String, such as 10:15.
+  ///
+  /// The output will be one of the following ISO-8601 formats:
+  /// HH:mm
+  /// HH:mm:ss
+  /// HH:mm:ss.SSS
+  /// HH:mm:ss.SSSSSS
+  /// The format used will be the shortest that outputs the full value of the 
time where
+  /// the omitted parts are implied to be zero.
+  ///
+  /// \param microseconds_from_midnight the time in microseconds from midnight
+  /// \return a string representation of this time
+  static std::string HumanTime(int64_t micros_from_midnight);
+
+  /// \brief Returns a string representation of a timestamp in microseconds.
+  ///
+  /// The output will be one of the following forms, according to the 
precision of the
+  /// timestamp:
+  ///  - yyyy-MM-ddTHH:mm:ss
+  ///  - yyyy-MM-ddTHH:mm:ss.SSS
+  ///  - yyyy-MM-ddTHH:mm:ss.SSSSSS
+  ///
+  /// \param timestamp_micros the timestamp in microseconds.
+  /// \return a string representation of this timestamp.
+  static std::string HumanTimestamp(int64_t timestamp_micros);
+
+  /// \brief Returns a human-readable string representation of a timestamp 
with a time
+  /// zone.
+  ///
+  /// The output will be one of the following forms, according to the 
precision of the
+  /// timestamp:
+  ///  - yyyy-MM-ddTHH:mm:ss+00:00
+  ///  - yyyy-MM-ddTHH:mm:ss.SSS+00:00
+  ///  - yyyy-MM-ddTHH:mm:ss.SSSSSS+00:00
+  ///
+  /// \param timestamp_micros the timestamp in microseconds.
+  /// \return a string representation of this timestamp.
+  static std::string HumanTimestampWithZone(int64_t timestamp_micros);
+
+  /// \brief Base64 encode a string
+  static std::string Base64Encode(std::string_view str_to_encode);
+};
+
+}  // namespace iceberg

Reply via email to