This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new d1aa97a  ARROW-8025: [C++] Implement cast from String to Binary
d1aa97a is described below

commit d1aa97a5cfed6062ade762eab491acb098a2b48b
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Tue Jun 23 17:54:40 2020 -0500

    ARROW-8025: [C++] Implement cast from String to Binary
    
    I'd defer the following conversions to folllow-up PRs, these require custom 
CastFunctors:
    - String/Binary -> FixedSizedBinary
    - FixedSizedBinary -> String/Binary
    
    Closes #7470 from kszucs/ARROW-8025
    
    Authored-by: Krisztián Szűcs <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 .../arrow/compute/kernels/scalar_cast_string.cc    |  3 +++
 cpp/src/arrow/compute/kernels/scalar_cast_test.cc  | 27 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index b5e9ea9..9b6d404 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -145,10 +145,13 @@ void AddNumberToStringCasts(std::shared_ptr<DataType> 
out_ty, CastFunction* func
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   auto cast_binary = std::make_shared<CastFunction>("cast_binary", 
Type::BINARY);
   AddCommonCasts(Type::BINARY, binary(), cast_binary.get());
+  AddZeroCopyCast(Type::STRING, {utf8()}, binary(), cast_binary.get());
 
   auto cast_large_binary =
       std::make_shared<CastFunction>("cast_large_binary", Type::LARGE_BINARY);
   AddCommonCasts(Type::LARGE_BINARY, large_binary(), cast_large_binary.get());
+  AddZeroCopyCast(Type::LARGE_STRING, {large_utf8()}, large_binary(),
+                  cast_large_binary.get());
 
   auto cast_fsb =
       std::make_shared<CastFunction>("cast_fixed_size_binary", 
Type::FIXED_SIZE_BINARY);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 8fd33d3..565f90f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -229,6 +229,27 @@ class TestCast : public TestBase {
                                     /*check_scalar=*/false);
   }
 
+  template <typename SourceType, typename DestType>
+  void TestCastStringToBinary() {
+    CastOptions options;
+    auto src_type = TypeTraits<SourceType>::type_singleton();
+    auto dest_type = TypeTraits<DestType>::type_singleton();
+
+    // All valid except the last one
+    std::vector<bool> all = {1, 1, 1, 1, 1};
+    std::vector<bool> valid = {1, 1, 1, 1, 0};
+    std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", 
kInvalidUtf8};
+
+    std::shared_ptr<Array> array;
+
+    // Should accept when invalid but null.
+    ArrayFromVector<SourceType, std::string>(src_type, valid, strings, &array);
+    CheckZeroCopy(*array, dest_type);
+
+    CheckCase<SourceType, std::string, DestType, std::string>(
+        src_type, strings, all, dest_type, strings, options);
+  }
+
   template <typename DestType>
   void TestCastNumberToString() {
     auto dest_type = TypeTraits<DestType>::type_singleton();
@@ -1444,6 +1465,12 @@ TEST_F(TestCast, LargeBinaryToLargeString) {
   TestCastBinaryToString<LargeBinaryType, LargeStringType>();
 }
 
+TEST_F(TestCast, StringToBinary) { TestCastStringToBinary<StringType, 
BinaryType>(); }
+
+TEST_F(TestCast, LargeStringToLargeBinary) {
+  TestCastStringToBinary<LargeStringType, LargeBinaryType>();
+}
+
 TEST_F(TestCast, NumberToString) { TestCastNumberToString<StringType>(); }
 
 TEST_F(TestCast, NumberToLargeString) { 
TestCastNumberToString<LargeStringType>(); }

Reply via email to