jorisvandenbossche commented on a change in pull request #10647:
URL: https://github.com/apache/arrow/pull/10647#discussion_r670406295



##########
File path: cpp/src/arrow/compute/api_scalar.cc
##########
@@ -240,6 +243,15 @@ StrptimeOptions::StrptimeOptions(std::string format, 
TimeUnit::type unit)
 StrptimeOptions::StrptimeOptions() : StrptimeOptions("", TimeUnit::SECOND) {}
 constexpr char StrptimeOptions::kTypeName[];
 
+StrftimeOptions::StrftimeOptions(std::string format, std::string timezone)
+    : FunctionOptions(internal::kStrftimeOptionsType),
+      format(std::move(format)),
+      timezone(std::move(timezone)) {
+  tz = arrow_vendored::date::locate_zone(this->timezone);
+}
+StrftimeOptions::StrftimeOptions() : StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", 
"UTC") {}

Review comment:
       The default is set to UTC, but for timestamp with timezone input, can we 
default to using the type's timezone?
   
   Also for timestamp without timezone, it should probably not use `%z` by 
default?
   
   And see my other comment, I suppose the default should rather be a literal 
`Z` instead of `%z` (or `%Z`) ?

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -178,6 +179,20 @@ class ARROW_EXPORT StrptimeOptions : public 
FunctionOptions {
   TimeUnit::type unit;
 };
 
+class ARROW_EXPORT StrftimeOptions : public FunctionOptions {
+ public:
+  explicit StrftimeOptions(std::string format, std::string timezone);
+  StrftimeOptions();
+  constexpr static char const kTypeName[] = "StrftimeOptions";
+
+  /// The desired format string.
+  std::string format;
+  /// Timezone to output the time in.
+  std::string timezone;

Review comment:
       How does this keyword behave if the type already has a timezone (then a 
simple conversion I assume) or if the type has no time zone?
   
   (alternative could be to leave out this option and require users first to 
explicitly convert from one timezone to another (which is a cheap metadata only 
change) or localize timezone-naive data)

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -178,6 +179,20 @@ class ARROW_EXPORT StrptimeOptions : public 
FunctionOptions {
   TimeUnit::type unit;
 };
 
+class ARROW_EXPORT StrftimeOptions : public FunctionOptions {
+ public:
+  explicit StrftimeOptions(std::string format, std::string timezone);
+  StrftimeOptions();
+  constexpr static char const kTypeName[] = "StrftimeOptions";
+
+  /// The desired format string.
+  std::string format;
+  /// Timezone to output the time in.
+  std::string timezone;

Review comment:
       Although for local timestamp without timezone, requiring the user to 
first localize it to a timezone-aware type might give additional overhead, as 
`strftime` would then convert it back to a local timestamp?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -216,5 +216,40 @@ TEST_F(ScalarTemporalTest, DayOfWeek) {
                 DayOfWeek(timestamps, 
DayOfWeekOptions(/*one_based_numbering=*/false,
                                                        /*week_start=*/8)));
 }
+
+#ifndef _WIN32
+TEST_F(ScalarTemporalTest, Strftime) {
+  auto options_seconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", "UTC");
+  auto options_milliseconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", "GMT");
+  auto options_microseconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", 
"Asia/Kolkata");
+  auto options_nanoseconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", 
"US/Hawaii");
+
+  const char* times_seconds = R"(["1970-01-01T00:00:59", null])";
+  const char* times_milliseconds = R"(["1970-01-01T00:00:59.123", null])";
+  const char* times_microseconds = R"(["1970-01-01T00:00:59.123456", null])";
+  const char* times_nanoseconds = R"(["1970-01-01T00:00:59.123456789", null])";
+
+  const char* zoned_seconds = R"(["1970-01-01T00:00:59+0000", null])";
+  const char* zoned_milliseconds = R"(["1970-01-01T00:00:59.123+0000", null])";

Review comment:
       With a default format of `%S`, I don't expect sub-second values to be 
shown (since there is a `%f` for this) ?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -321,6 +322,82 @@ struct Nanosecond {
   }
 };
 
+// ----------------------------------------------------------------------
+// Convert timestamps to a string representation with an arbitrary format
+
+template <typename Duration>
+inline std::string get_timestamp(int64_t arg, const StrftimeOptions* options) {
+  auto zt = arrow_vendored::date::zoned_time<Duration>{options->tz,
+                                                       
sys_time<Duration>(Duration{arg})};
+  return arrow_vendored::date::format(options->format, zt);
+}
+
+// If source timestamp is timezone naive we do not print timezone info
+inline StrftimeOptions get_options(KernelContext* ctx, const bool 
timezone_known) {
+  const StrftimeOptions options = StrftimeState::Get(ctx);
+  if (timezone_known) {
+    return options;
+  }
+
+  std::string new_format = std::move(options.format);
+  for (std::string str : {"%z", "%Z"}) {
+    size_t pos = std::string::npos;
+    while ((pos = new_format.find(str)) != std::string::npos) {
+      new_format.erase(pos, str.length());
+    }
+  }
+  return StrftimeOptions(new_format, options.timezone);
+}
+
+template <typename Duration>
+struct Strftime {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    const auto& timezone = GetInputTimezone(in);
+    const StrftimeOptions options = get_options(ctx, !timezone.empty());
+    if (timezone.empty() && options.timezone != "UTC") {
+      return Status::Invalid("Timezone naive timestamp can only be printed in 
UTC. Got: ",

Review comment:
       Why allow UTC here (and not other timezones, or disallow any timezone 
including UTC) ?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -216,5 +216,40 @@ TEST_F(ScalarTemporalTest, DayOfWeek) {
                 DayOfWeek(timestamps, 
DayOfWeekOptions(/*one_based_numbering=*/false,
                                                        /*week_start=*/8)));
 }
+
+#ifndef _WIN32
+TEST_F(ScalarTemporalTest, Strftime) {
+  auto options_seconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", "UTC");
+  auto options_milliseconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", "GMT");
+  auto options_microseconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", 
"Asia/Kolkata");
+  auto options_nanoseconds = StrftimeOptions("%Y-%m-%dT%H:%M:%S%z", 
"US/Hawaii");
+
+  const char* times_seconds = R"(["1970-01-01T00:00:59", null])";
+  const char* times_milliseconds = R"(["1970-01-01T00:00:59.123", null])";
+  const char* times_microseconds = R"(["1970-01-01T00:00:59.123456", null])";
+  const char* times_nanoseconds = R"(["1970-01-01T00:00:59.123456789", null])";
+
+  const char* zoned_seconds = R"(["1970-01-01T00:00:59+0000", null])";
+  const char* zoned_milliseconds = R"(["1970-01-01T00:00:59.123+0000", null])";

Review comment:
       I see you commented about this in the python/pandas tests below as well. 
   Is this the default behaviour of `date.h` (which seems a strange deviation 
from the standard?)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to