pitrou commented on a change in pull request #11082:
URL: https://github.com/apache/arrow/pull/11082#discussion_r702862294
##########
File path: cpp/src/arrow/compute/kernels/scalar_string.cc
##########
@@ -420,6 +426,205 @@ struct StringTransformExecWithState
}
};
+/// Kernel exec generator for binary string transforms.
+/// The first parameter is expected to always be a string type while the
second parameter
+/// is generic. It supports executions of the form:
+/// * Scalar, Scalar
+/// * Array, Scalar - scalar is broadcasted and paired with all values of
array
+/// * Array, Array - arrays are processed element-wise
+/// * Scalar, Array - not supported by default
+// TODO(edponce): For when second parameter is an array, need to specify a
corresponding
+// iterator/visitor.
+template <typename Type1, typename Type2, typename StringTransform>
+struct StringBinaryTransformExecBase {
+ using offset_type = typename Type1::offset_type;
+ using ArrayType1 = typename TypeTraits<Type1>::ArrayType;
+ using ArrayType2 = typename TypeTraits<Type2>::ArrayType;
+
+ static Status Execute(KernelContext* ctx, StringTransform* transform,
+ const ExecBatch& batch, Datum* out) {
+ if (batch.num_values() != 2) {
+ return Status::Invalid("Invalid arity for binary string transform");
+ }
+
+ if (batch[0].is_array()) {
+ if (batch[1].is_array()) {
+ return ExecArrayArray(ctx, transform, batch[0].array(),
batch[1].array(), out);
+ } else if (batch[1].is_scalar()) {
+ return ExecArrayScalar(ctx, transform, batch[0].array(),
batch[1].scalar(), out);
+ }
+ } else if (batch[0].is_scalar()) {
+ if (batch[1].is_array()) {
+ return ExecScalarArray(ctx, transform, batch[0].scalar(),
batch[1].array(), out);
+ } else if (batch[1].is_scalar()) {
+ return ExecScalarScalar(ctx, transform, batch[0].scalar(),
batch[1].scalar(),
+ out);
+ }
+ }
+ return Status::Invalid("Invalid ExecBatch kind for binary string
transform");
+ }
+
+ private:
+ static Status ExecScalarScalar(KernelContext* ctx, StringTransform*
transform,
+ const std::shared_ptr<Scalar>& scalar1,
+ const std::shared_ptr<Scalar>& scalar2,
Datum* out) {
+ const auto& input1 = checked_cast<const BaseBinaryScalar&>(*scalar1);
+ // TODO(edponce): How to validate inputs? For some kernels, returning null
is ok
+ // (ascii_lower) but others not necessarily (concatenate)
+ if (!input1.is_valid) {
+ return Status::OK();
+ }
+
+ auto input_ncodeunits = input1.value->size();
+ auto input_nstrings = 1;
+ auto output_ncodeunits_max =
+ transform->MaxCodeunits(input_nstrings, input_ncodeunits);
Review comment:
Weirdly, it seems the result size would only depend on the _first_ input?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]