rok commented on a change in pull request #12464:
URL: https://github.com/apache/arrow/pull/12464#discussion_r830104333
##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -1143,6 +1145,110 @@ struct Strftime {
};
#endif
+// ----------------------------------------------------------------------
+// Convert string representations of timestamps in arbitrary format to
timestamps
+
+const std::string GetZone(const std::string& format) {
+ // Check for use of %z or %Z
+ size_t cur = 0;
+ std::string zone = "";
+ while (cur < format.size() - 1) {
+ if (format[cur] == '%') {
+ if (format[cur + 1] == 'z') {
+ zone = "UTC";
+ break;
+ }
+ cur++;
+ }
+ cur++;
+ }
+ return zone;
+}
+
+template <typename Duration, typename InType>
+struct Strptime {
+ const std::shared_ptr<TimestampParser> parser;
+ const TimeUnit::type unit;
+ const std::string zone;
+ const bool error_is_null;
+
+ static Result<Strptime> Make(KernelContext* ctx, const DataType& type) {
+ const StrptimeOptions& options = StrptimeState::Get(ctx);
+
+ return Strptime{TimestampParser::MakeStrptime(options.format),
+ std::move(options.unit), GetZone(options.format),
+ options.error_is_null};
+ }
+
+ static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+ ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
+
+ if (in.is_valid) {
+ auto s = internal::UnboxScalar<InType>::Unbox(in);
+ int64_t result;
+ if ((*self.parser)(s.data(), s.size(), self.unit, &result)) {
+ *checked_cast<TimestampScalar*>(out) =
+ TimestampScalar(result, timestamp(self.unit, self.zone));
+ } else {
+ if (self.error_is_null) {
+ out->is_valid = false;
+ } else {
+ return Status::Invalid("Failed to parse string: '", s.data(),
+ "' as a scalar of type ",
+ TimestampType(self.unit).ToString());
+ }
+ }
+ } else {
+ out->is_valid = false;
+ }
+ return Status::OK();
+ }
+
+ static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+ ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
+ int64_t* out_data = out->GetMutableValues<int64_t>(1);
+
+ if (self.error_is_null) {
+ auto visit_null = [&]() { out_data++; };
+ auto visit_value = [&](util::string_view s) {
+ int64_t result;
+ if ((*self.parser)(s.data(), s.size(), self.unit, &result)) {
+ *out_data++ = result;
+ }
+ };
+ VisitArrayDataInline<InType>(in, visit_value, visit_null);
+ } else {
+ auto visit_null = [&]() {
+ out_data++;
+ return Status::OK();
+ };
+ auto visit_value = [&](util::string_view s) {
+ int64_t result;
+ if ((*self.parser)(s.data(), s.size(), self.unit, &result)) {
+ *out_data++ = result;
+ return Status::OK();
+ } else {
+ return Status::Invalid("Failed to parse string: '", s.data(),
+ "' as a scalar of type ",
+ TimestampType(self.unit).ToString());
+ }
+ };
+ RETURN_NOT_OK(VisitArrayDataInline<InType>(in, visit_value, visit_null));
+ }
Review comment:
Does this look right @lidavidm @pitrou? This test is still failing:
```
StrptimeOptions options("%d/%m/%Y", TimeUnit::MICRO, /*error_is_null=*/true);
std::string input2 = R"(["5/1/2020", "AA/BB/CCCC", "AA/BB/CCCC",
"AA/BB/CCCC", null])";
std::string output2 = R"(["2020-01-05", null, null, null, null])";
this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO), output2,
&options);
```
With:
```
Got:
[
[
2020-01-05 00:00:00.000000,
1970-01-01 00:00:00.000000,
1970-01-01 00:00:00.000000,
1970-01-01 00:00:00.000000,
null
]
]
Expected:
[
[
2020-01-05 00:00:00.000000
],
[
null,
null,
null,
null
]
]
```
I suppose I need to adjust the null mask as well.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]