lidavidm commented on a change in pull request #12464:
URL: https://github.com/apache/arrow/pull/12464#discussion_r832163311



##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -1143,6 +1145,117 @@ struct Strftime {
 };
 #endif
 
+// ----------------------------------------------------------------------
+// Convert string representations of timestamps in arbitrary format to 
timestamps
+
+static std::string GetZone(std::string format) {
+  // Check for use of %z or %Z
+  size_t cur = 0;
+  std::string zone = "";
+  while (cur < format.size() - 1) {
+    if (format[cur] == '%') {
+      if (format[cur + 1] == 'z') {
+        zone = "UTC";
+        break;
+      }
+      cur++;
+    }
+    cur++;
+  }
+  return zone;
+}
+
+template <typename Duration, typename InType>
+struct Strptime {
+  const std::shared_ptr<TimestampParser> parser;
+  const TimeUnit::type unit;
+  const std::string zone;
+  const bool raise_errors;
+
+  static Result<Strptime> Make(KernelContext* ctx, const DataType& type) {
+    const StrptimeOptions& options = StrptimeState::Get(ctx);
+
+    return Strptime{TimestampParser::MakeStrptime(options.format),
+                    std::move(options.unit), GetZone(options.format),
+                    options.raise_errors};
+  }
+
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
+
+    if (in.is_valid) {
+      auto s = internal::UnboxScalar<InType>::Unbox(in);
+      int64_t result;
+      if ((*self.parser)(s.data(), s.size(), self.unit, &result)) {
+        *checked_cast<TimestampScalar*>(out) =
+            TimestampScalar(result, timestamp(self.unit, self.zone));
+      } else {
+        if (self.raise_errors) {
+          return Status::Invalid("Failed to parse string: '", s.data(),
+                                 "' as a scalar of type ",
+                                 TimestampType(self.unit).ToString());
+        } else {
+          out->is_valid = false;
+        }
+      }
+    } else {
+      out->is_valid = false;
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
+
+    std::unique_ptr<ArrayBuilder> array_builder;

Review comment:
       Looks good, though: if the kernel is set to null_handling = 
INTERSECTION, then instead of setting/clearing every individual bit using the 
writer, we can just manually clear the bits for slots that fail to parse (and 
if we are raising errors, we shouldn't need to touch the null bitmap at all) - 
that should presumably be faster in the common case.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to