edponce commented on a change in pull request #11023:
URL: https://github.com/apache/arrow/pull/11023#discussion_r741377335



##########
File path: cpp/src/arrow/compute/kernels/scalar_string.cc
##########
@@ -2513,6 +2878,135 @@ void AddSplit(FunctionRegistry* registry) {
 #endif
 }
 
+/// An ScalarFunction that promotes integer arguments to Int64.
+struct ScalarCTypeToInt64Function : public ScalarFunction {
+  using ScalarFunction::ScalarFunction;
+
+  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const 
override {
+    RETURN_NOT_OK(CheckArity(*values));
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+    EnsureDictionaryDecoded(values);
+
+    for (auto& descr : *values) {
+      if (is_integer(descr.type->id())) {
+        descr.type = int64();
+      }
+    }
+
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+template <typename Type1, typename Type2>
+struct StringRepeatTransform : public StringBinaryTransformBase<Type1, Type2> {
+  using ArrayType1 = typename TypeTraits<Type1>::ArrayType;
+  using ArrayType2 = typename TypeTraits<Type2>::ArrayType;
+
+  Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
+                               const int64_t num_repeats) override {
+    ARROW_RETURN_NOT_OK(ValidateRepeatCount(num_repeats));
+    return input1_ncodeunits * num_repeats;
+  }
+
+  Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
+                               const ArrayType2& input2) override {
+    int64_t total_num_repeats = 0;
+    for (int64_t i = 0; i < input2.length(); ++i) {
+      auto num_repeats = input2.GetView(i);
+      ARROW_RETURN_NOT_OK(ValidateRepeatCount(num_repeats));
+      total_num_repeats += num_repeats;
+    }
+    return input1_ncodeunits * total_num_repeats;
+  }
+
+  Result<int64_t> MaxCodeunits(const ArrayType1& input1,
+                               const int64_t num_repeats) override {
+    ARROW_RETURN_NOT_OK(ValidateRepeatCount(num_repeats));
+    return input1.total_values_length() * num_repeats;
+  }
+
+  Result<int64_t> MaxCodeunits(const ArrayType1& input1,
+                               const ArrayType2& input2) override {
+    int64_t total_codeunits = 0;
+    for (int64_t i = 0; i < input2.length(); ++i) {
+      auto num_repeats = input2.GetView(i);
+      ARROW_RETURN_NOT_OK(ValidateRepeatCount(num_repeats));
+      total_codeunits += input1.GetView(i).length() * num_repeats;
+    }
+    return total_codeunits;
+  }
+
+  static Result<int64_t> TransformSimpleLoop(const uint8_t* input,
+                                             const int64_t 
input_string_ncodeunits,
+                                             const int64_t num_repeats, 
uint8_t* output) {
+    uint8_t* output_start = output;
+    for (int64_t i = 0; i < num_repeats; ++i) {
+      std::memcpy(output, input, input_string_ncodeunits);
+      output += input_string_ncodeunits;
+    }
+    return output - output_start;
+  }
+
+  static Result<int64_t> TransformDoublingString(const uint8_t* input,
+                                                 const int64_t 
input_string_ncodeunits,
+                                                 const int64_t num_repeats,
+                                                 uint8_t* output) {
+    uint8_t* output_start = output;
+    // Repeated doubling of string
+    std::memcpy(output, input, input_string_ncodeunits);
+    output += input_string_ncodeunits;
+    int64_t i = 1;
+    for (int64_t ilen = input_string_ncodeunits; i <= (num_repeats / 2);
+         i *= 2, ilen *= 2) {
+      std::memcpy(output, output_start, ilen);
+      output += ilen;
+    }
+
+    // Epilogue remainder
+    int64_t rem = (num_repeats ^ i) * input_string_ncodeunits;

Review comment:
       Not really, `xor` is just representing `mod 2` but in this case 
subtraction is also valid.
   Changed it to subtraction and renamed variable to `irep` for improved 
readability.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to