pitrou commented on a change in pull request #11328:
URL: https://github.com/apache/arrow/pull/11328#discussion_r723343926
##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_string.cc
##########
@@ -105,6 +106,105 @@ struct TemporalToStringCastFunctor {
}
};
+template <typename O>
+struct TemporalToStringCastFunctor<O, TimestampType> {
+ using value_type = typename TypeTraits<TimestampType>::CType;
+ using BuilderType = typename TypeTraits<O>::BuilderType;
+ using FormatterType = StringFormatter<TimestampType>;
+
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ DCHECK(out->is_array());
+ const ArrayData& input = *batch[0].array();
+ ArrayData* output = out->mutable_array();
+ return Convert(ctx, input, output);
+ }
+
+ static Status Convert(KernelContext* ctx, const ArrayData& input, ArrayData*
output) {
+ const auto& timezone = GetInputTimezone(*input.type);
+ const auto& ty = checked_cast<const TimestampType&>(*input.type);
+ BuilderType builder(input.type, ctx->memory_pool());
+
+ // Preallocate
+ int64_t string_length = 19; // YYYY-MM-DD HH:MM:SS
+ if (ty.unit() == TimeUnit::MILLI) {
+ string_length += 4; // .SSS
+ } else if (ty.unit() == TimeUnit::MICRO) {
+ string_length += 7; // .SSSSSS
+ } else if (ty.unit() == TimeUnit::NANO) {
+ string_length += 10; // .SSSSSSSSS
+ }
+ if (!timezone.empty()) string_length += 5; // +0000
+ RETURN_NOT_OK(builder.Reserve(input.length));
+ RETURN_NOT_OK(
+ builder.ReserveData((input.length - input.GetNullCount()) *
string_length));
+
+ if (timezone.empty()) {
+ FormatterType formatter(input.type);
+ RETURN_NOT_OK(VisitArrayDataInline<TimestampType>(
+ input,
+ [&](value_type v) {
+ return formatter(v, [&](util::string_view v) { return
builder.Append(v); });
+ },
+ [&]() {
+ builder.UnsafeAppendNull();
+ return Status::OK();
+ }));
+ } else {
+#ifdef _WIN32
+ // TODO(ARROW-13168):
+ return Status::NotImplemented(
+ "Casting a timestamp with time zone to string is not yet supported
on "
+ "Windows.");
+#else
+ switch (ty.unit()) {
+ case TimeUnit::SECOND:
+ RETURN_NOT_OK(ConvertZoned<std::chrono::seconds>(input, timezone,
&builder));
+ break;
+ case TimeUnit::MILLI:
+ RETURN_NOT_OK(
+ ConvertZoned<std::chrono::milliseconds>(input, timezone,
&builder));
+ break;
+ case TimeUnit::MICRO:
+ RETURN_NOT_OK(
+ ConvertZoned<std::chrono::microseconds>(input, timezone,
&builder));
+ break;
+ case TimeUnit::NANO:
+ RETURN_NOT_OK(
+ ConvertZoned<std::chrono::nanoseconds>(input, timezone,
&builder));
+ break;
+ default:
+ DCHECK(false);
+ return Status::NotImplemented("Unimplemented time unit");
+ }
+#endif
+ }
+ std::shared_ptr<Array> output_array;
+ RETURN_NOT_OK(builder.Finish(&output_array));
+ *output = std::move(*output_array->data());
+ return Status::OK();
+ }
+
+ template <typename Duration>
+ static Status ConvertZoned(const ArrayData& input, const std::string&
timezone,
+ BuilderType* builder) {
+ static std::string kFormatString = "%Y-%m-%d %H:%M:%S%z";
Review comment:
Do we actually want to append the timezone and localize the timestamp?
Pretty-printing currently doesn't.
I don't know what the user expects here (we may actually need to make this
customizable at some point :-)).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]