lidavidm commented on a change in pull request #11230:
URL: https://github.com/apache/arrow/pull/11230#discussion_r721648082



##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -1921,6 +1963,24 @@ TEST(Cast, BinaryOrStringToBinary) {
                 MaskArrayWithNullsAt(InvalidUtf8(to_type), {4}));
     }
   }
+
+  auto from_type = fixed_size_binary(3);
+  auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
+  CheckCast(invalid_utf8, invalid_utf8);
+  CheckCastFails(invalid_utf8, CastOptions::Safe(fixed_size_binary(5)));

Review comment:
       That's covered by the CheckCast above; when casting fixed_size_binary to 
fixed_size_binary of the same width, then the cast succeeds. It's only casts 
between differing widths that fail.

##########
File path: cpp/src/arrow/compute/kernels/scalar_string_test.cc
##########
@@ -378,6 +378,199 @@ TYPED_TEST(TestBinaryKernels, BinaryJoinElementWise) {
   ASSERT_RAISES(Invalid, CallFunction("binary_join_element_wise", {}, 
&options));
 }
 
+class TestFixedSizeBinaryKernels : public ::testing::Test {
+ protected:
+  void CheckUnary(std::string func_name, std::string json_input,
+                  std::shared_ptr<DataType> out_ty, std::string json_expected,
+                  const FunctionOptions* options = nullptr) {
+    CheckScalarUnary(func_name, type(), json_input, out_ty, json_expected, 
options);
+    // Ensure the equivalent binary kernel does the same thing
+    CheckScalarUnary(func_name, binary(), json_input,
+                     out_ty->id() == Type::FIXED_SIZE_BINARY ? binary() : 
out_ty,
+                     json_expected, options);
+  }
+
+  std::shared_ptr<DataType> type() const { return fixed_size_binary(6); }
+  std::shared_ptr<DataType> offset_type() const { return int32(); }
+};
+
+TEST_F(TestFixedSizeBinaryKernels, BinaryLength) {
+  CheckUnary("binary_length", R"(["aaaaaa", null, "áéí"])", offset_type(),
+             "[6, null, 6]");
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinaryReplaceSlice) {
+  ReplaceSliceOptions options{0, 1, "XX"};
+  CheckUnary("binary_replace_slice", "[]", fixed_size_binary(7), "[]", 
&options);
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(7),
+             R"([null, "XXbcdef"])", &options);
+
+  ReplaceSliceOptions options_shrink{0, 2, ""};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(4),
+             R"([null, "cdef"])", &options_shrink);
+
+  ReplaceSliceOptions options_whole{0, 6, "XX"};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(2),
+             R"([null, "XX"])", &options_whole);
+
+  ReplaceSliceOptions options_middle{2, 4, "XX"};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(6),
+             R"([null, "abXXef"])", &options_middle);
+
+  ReplaceSliceOptions options_neg_start{-3, -2, "XX"};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(7),
+             R"([null, "abcXXef"])", &options_neg_start);
+
+  ReplaceSliceOptions options_neg_end{2, -2, "XX"};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(6),
+             R"([null, "abXXef"])", &options_neg_end);
+
+  ReplaceSliceOptions options_neg_pos{-1, 2, "XX"};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(8),
+             R"([null, "abcdeXXf"])", &options_neg_pos);
+
+  // Effectively the same as [2, 2)
+  ReplaceSliceOptions options_flip{2, 0, "XX"};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(8),
+             R"([null, "abXXcdef"])", &options_flip);
+
+  // Effectively the same as [-3, -3)
+  ReplaceSliceOptions options_neg_flip{-3, -5, "XX"};
+  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", 
fixed_size_binary(8),
+             R"([null, "abcXXdef"])", &options_neg_flip);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, CountSubstring) {
+  MatchSubstringOptions options{"aba"};
+  CheckUnary("count_substring", "[]", offset_type(), "[]", &options);
+  CheckUnary(
+      "count_substring",
+      R"(["      ", null, "  ab  ", " aba  ", "baba  ", "ababa ", "abaaba", 
"ABAABA"])",
+      offset_type(), "[0, null, 0, 1, 1, 1, 2, 0]", &options);
+
+  MatchSubstringOptions options_empty{""};
+  CheckUnary("count_substring", R"(["      ", null, "abc   "])", offset_type(),
+             "[7, null, 7]", &options_empty);
+
+  MatchSubstringOptions options_repeated{"aaa"};
+  CheckUnary("count_substring", R"(["      ", "aaaa  ", "aaaaa ", "aaaaaa", 
"aaáaa"])",
+             offset_type(), "[0, 1, 1, 2, 0]", &options_repeated);
+}
+
+#ifdef ARROW_WITH_RE2
+TEST_F(TestFixedSizeBinaryKernels, CountSubstringRegex) {
+  MatchSubstringOptions options{"aba"};
+  CheckUnary("count_substring_regex", "[]", offset_type(), "[]", &options);
+  CheckUnary(
+      "count_substring",

Review comment:
       Whoops, fixed.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to