This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new d1aa97a ARROW-8025: [C++] Implement cast from String to Binary
d1aa97a is described below
commit d1aa97a5cfed6062ade762eab491acb098a2b48b
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Tue Jun 23 17:54:40 2020 -0500
ARROW-8025: [C++] Implement cast from String to Binary
I'd defer the following conversions to folllow-up PRs, these require custom
CastFunctors:
- String/Binary -> FixedSizedBinary
- FixedSizedBinary -> String/Binary
Closes #7470 from kszucs/ARROW-8025
Authored-by: Krisztián Szűcs <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
---
.../arrow/compute/kernels/scalar_cast_string.cc | 3 +++
cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 27 ++++++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index b5e9ea9..9b6d404 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -145,10 +145,13 @@ void AddNumberToStringCasts(std::shared_ptr<DataType>
out_ty, CastFunction* func
std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
auto cast_binary = std::make_shared<CastFunction>("cast_binary",
Type::BINARY);
AddCommonCasts(Type::BINARY, binary(), cast_binary.get());
+ AddZeroCopyCast(Type::STRING, {utf8()}, binary(), cast_binary.get());
auto cast_large_binary =
std::make_shared<CastFunction>("cast_large_binary", Type::LARGE_BINARY);
AddCommonCasts(Type::LARGE_BINARY, large_binary(), cast_large_binary.get());
+ AddZeroCopyCast(Type::LARGE_STRING, {large_utf8()}, large_binary(),
+ cast_large_binary.get());
auto cast_fsb =
std::make_shared<CastFunction>("cast_fixed_size_binary",
Type::FIXED_SIZE_BINARY);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 8fd33d3..565f90f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -229,6 +229,27 @@ class TestCast : public TestBase {
/*check_scalar=*/false);
}
+ template <typename SourceType, typename DestType>
+ void TestCastStringToBinary() {
+ CastOptions options;
+ auto src_type = TypeTraits<SourceType>::type_singleton();
+ auto dest_type = TypeTraits<DestType>::type_singleton();
+
+ // All valid except the last one
+ std::vector<bool> all = {1, 1, 1, 1, 1};
+ std::vector<bool> valid = {1, 1, 1, 1, 0};
+ std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "",
kInvalidUtf8};
+
+ std::shared_ptr<Array> array;
+
+ // Should accept when invalid but null.
+ ArrayFromVector<SourceType, std::string>(src_type, valid, strings, &array);
+ CheckZeroCopy(*array, dest_type);
+
+ CheckCase<SourceType, std::string, DestType, std::string>(
+ src_type, strings, all, dest_type, strings, options);
+ }
+
template <typename DestType>
void TestCastNumberToString() {
auto dest_type = TypeTraits<DestType>::type_singleton();
@@ -1444,6 +1465,12 @@ TEST_F(TestCast, LargeBinaryToLargeString) {
TestCastBinaryToString<LargeBinaryType, LargeStringType>();
}
+TEST_F(TestCast, StringToBinary) { TestCastStringToBinary<StringType,
BinaryType>(); }
+
+TEST_F(TestCast, LargeStringToLargeBinary) {
+ TestCastStringToBinary<LargeStringType, LargeBinaryType>();
+}
+
TEST_F(TestCast, NumberToString) { TestCastNumberToString<StringType>(); }
TEST_F(TestCast, NumberToLargeString) {
TestCastNumberToString<LargeStringType>(); }