This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a288364d97 GH-30117:  [C++][Python] Add "Z" to the end of timestamp 
print string when tz defined (#39272)
a288364d97 is described below

commit a288364d971ab9a6a3f05a903a5df83ebeddf0a0
Author: Alenka Frim <[email protected]>
AuthorDate: Mon Jan 8 14:26:13 2024 +0100

    GH-30117:  [C++][Python] Add "Z" to the end of timestamp print string when 
tz defined (#39272)
    
    ### What changes are included in this PR?
    
    This PR updates the PrettyPrint for Timestamp type so that "Z" is printed 
at the end of the output string if the timezone has been defined. This way we 
add minimum information about the values being stored in UTC.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    There is a change in how `TimestampArray` prints out the data. With this 
change "Z" would be added to the end of the string if the timezone is defined.
    * Closes: #30117
    
    Lead-authored-by: AlenkaF <[email protected]>
    Co-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Rok Mihevc <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 cpp/src/arrow/pretty_print_test.cc         |  6 +++---
 cpp/src/arrow/util/formatting.h            |  7 ++++++-
 cpp/src/arrow/util/formatting_util_test.cc | 28 ++++++++++++++++++++++++++++
 python/pyarrow/tests/test_types.py         | 11 +++++++++++
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/pretty_print_test.cc 
b/cpp/src/arrow/pretty_print_test.cc
index 0db6ae4867..5d2256e8c5 100644
--- a/cpp/src/arrow/pretty_print_test.cc
+++ b/cpp/src/arrow/pretty_print_test.cc
@@ -350,10 +350,10 @@ TEST_F(TestPrettyPrint, DateTimeTypes) {
     std::vector<int64_t> values = {
         0, 1, 2, 678 + 1000000 * (5 + 60 * (4 + 60 * (3 + 24 * int64_t(1)))), 
4};
     static const char* expected = R"expected([
-  1970-01-01 00:00:00.000000,
-  1970-01-01 00:00:00.000001,
+  1970-01-01 00:00:00.000000Z,
+  1970-01-01 00:00:00.000001Z,
   null,
-  1970-01-02 03:04:05.000678,
+  1970-01-02 03:04:05.000678Z,
   null
 ])expected";
     CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::MICRO, 
"Transylvania"),
diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h
index 9dcc6463fb..71bae74629 100644
--- a/cpp/src/arrow/util/formatting.h
+++ b/cpp/src/arrow/util/formatting.h
@@ -470,7 +470,8 @@ class StringFormatter<TimestampType> {
   using value_type = int64_t;
 
   explicit StringFormatter(const DataType* type)
-      : unit_(checked_cast<const TimestampType&>(*type).unit()) {}
+      : unit_(checked_cast<const TimestampType&>(*type).unit()),
+        timezone_(checked_cast<const TimestampType&>(*type).timezone()) {}
 
   template <typename Duration, typename Appender>
   Return<Appender> operator()(Duration, value_type value, Appender&& append) {
@@ -503,6 +504,9 @@ class StringFormatter<TimestampType> {
     std::array<char, buffer_size> buffer;
     char* cursor = buffer.data() + buffer_size;
 
+    if (timezone_.size() > 0) {
+      detail::FormatOneChar('Z', &cursor);
+    }
     detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), 
&cursor);
     detail::FormatOneChar(' ', &cursor);
     detail::FormatYYYY_MM_DD(timepoint_days, &cursor);
@@ -516,6 +520,7 @@ class StringFormatter<TimestampType> {
 
  private:
   TimeUnit::type unit_;
+  std::string timezone_;
 };
 
 template <typename T>
diff --git a/cpp/src/arrow/util/formatting_util_test.cc 
b/cpp/src/arrow/util/formatting_util_test.cc
index 9afbc91063..13f57a495d 100644
--- a/cpp/src/arrow/util/formatting_util_test.cc
+++ b/cpp/src/arrow/util/formatting_util_test.cc
@@ -522,6 +522,34 @@ TEST(Formatting, Timestamp) {
     AssertFormatting(formatter, -2203932304LL * 1000000000LL + 8,
                      "1900-02-28 12:34:56.000000008");
   }
+
+  {
+    auto timestamp_types = {timestamp(TimeUnit::SECOND, "US/Eastern"),
+                            timestamp(TimeUnit::SECOND, "+01:00")};
+    for (auto ty : timestamp_types) {
+      StringFormatter<TimestampType> formatter(ty.get());
+
+      AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z");
+    }
+  }
+
+  {
+    auto ty = timestamp(TimeUnit::MILLI, "Pacific/Maruesas");
+    StringFormatter<TimestampType> formatter(ty.get());
+    AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000Z");
+  }
+
+  {
+    auto ty = timestamp(TimeUnit::MICRO, "-42:00");
+    StringFormatter<TimestampType> formatter(ty.get());
+    AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000Z");
+  }
+
+  {
+    auto ty = timestamp(TimeUnit::NANO, "Mars/Mariner_Valley");
+    StringFormatter<TimestampType> formatter(ty.get());
+    AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000000Z");
+  }
 }
 
 TEST(Formatting, Interval) {
diff --git a/python/pyarrow/tests/test_types.py 
b/python/pyarrow/tests/test_types.py
index 7600f1dd33..c8a52c6b62 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -487,6 +487,17 @@ def test_timestamp():
             pa.timestamp(invalid_unit)
 
 
+def test_timestamp_print():
+    for unit in ('s', 'ms', 'us', 'ns'):
+        for tz in ('UTC', 'Europe/Paris', 'Pacific/Marquesas',
+                   'Mars/Mariner_Valley', '-00:42', '+42:00'):
+            ty = pa.timestamp(unit, tz=tz)
+            arr = pa.array([0], ty)
+            assert "Z" in str(arr)
+        arr = pa.array([0], pa.timestamp(unit))
+        assert "Z" not in str(arr)
+
+
 def test_time32_units():
     for valid_unit in ('s', 'ms'):
         ty = pa.time32(valid_unit)

Reply via email to