This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 37a8bf04bc GH-39049: [C++] Use Cast() instead of CastTo() for 
Dictionary Scalar in test (#39362)
37a8bf04bc is described below

commit 37a8bf04bc713858a5b247d4424c1e8505e61947
Author: Hyunseok Seo <[email protected]>
AuthorDate: Sun Jan 7 08:02:08 2024 +0900

    GH-39049: [C++] Use Cast() instead of CastTo() for Dictionary Scalar in 
test (#39362)
    
    
    
    ### Rationale for this change
    
    Remove legacy code
    
    ### What changes are included in this PR?
    
    Replace the legacy scalar CastTo implementation for Dictionary Scalar in 
test.
    
    ### Are these changes tested?
    
    Yes. It is passed by existing test cases.
    
    ### Are there any user-facing changes?
    
    No.
    
    * Closes: #39049
    
    Authored-by: Hyunseok Seo <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 .../compute/kernels/scalar_cast_dictionary.cc      | 30 +++++++++++++++-------
 cpp/src/arrow/dataset/partition_test.cc            |  4 +--
 cpp/src/arrow/scalar_test.cc                       |  3 ++-
 3 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
index 13c0d599bf..f13aa26d96 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -36,14 +36,22 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& 
batch, ExecResult* o
   const CastOptions& options = CastState::Get(ctx);
   const auto& out_type = checked_cast<const DictionaryType&>(*out->type());
 
+  std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
+
   // if out type is same as in type, return input
   if (out_type.Equals(*batch[0].type())) {
     /// XXX: This is the wrong place to do a zero-copy optimization
-    out->value = batch[0].array.ToArrayData();
+    out->value = in_array;
     return Status::OK();
   }
 
-  std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
+  // If the input type is STRING, it is first encoded as a dictionary to 
facilitate
+  // processing. This approach allows the subsequent code to uniformly handle 
STRING
+  // inputs as if they were originally provided in dictionary format. Encoding 
as a
+  // dictionary helps in reusing the same logic for dictionary operations.
+  if (batch[0].type()->id() == Type::STRING) {
+    in_array = DictionaryEncode(in_array)->array();
+  }
   const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
 
   ArrayData* out_array = out->array_data().get();
@@ -77,17 +85,21 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& 
batch, ExecResult* o
   return Status::OK();
 }
 
-std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
-  auto func = std::make_shared<CastFunction>("cast_dictionary", 
Type::DICTIONARY);
-
-  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get());
-  ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType, 
CastToDictionary);
+template <typename SrcType>
+void AddDictionaryCast(CastFunction* func) {
+  ScalarKernel kernel({InputType(SrcType::type_id)}, kOutputTargetType, 
CastToDictionary);
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
+}
 
-  DCHECK_OK(func->AddKernel(Type::DICTIONARY, std::move(kernel)));
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
+  auto cast_dict = std::make_shared<CastFunction>("cast_dictionary", 
Type::DICTIONARY);
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dict.get());
+  AddDictionaryCast<DictionaryType>(cast_dict.get());
+  AddDictionaryCast<StringType>(cast_dict.get());
 
-  return {func};
+  return {cast_dict};
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/dataset/partition_test.cc 
b/cpp/src/arrow/dataset/partition_test.cc
index 7ec96929a9..1b71be15d1 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -316,7 +316,7 @@ TEST_F(TestPartitioning, 
DirectoryPartitioningFormatDictionary) {
                                                           
ArrayVector{dictionary});
   written_schema_ = partitioning_->schema();
 
-  ASSERT_OK_AND_ASSIGN(auto dict_hello, 
MakeScalar("hello")->CastTo(DictStr("")->type()));
+  ASSERT_OK_AND_ASSIGN(auto dict_hello, Cast(MakeScalar("hello"), 
DictStr("")->type()));
   AssertFormat(equal(field_ref("alpha"), literal(dict_hello)), "hello");
 }
 
@@ -329,7 +329,7 @@ TEST_F(TestPartitioning, 
DirectoryPartitioningFormatDictionaryCustomIndex) {
       schema({field("alpha", dict_type)}), ArrayVector{dictionary});
   written_schema_ = partitioning_->schema();
 
-  ASSERT_OK_AND_ASSIGN(auto dict_hello, 
MakeScalar("hello")->CastTo(dict_type));
+  ASSERT_OK_AND_ASSIGN(auto dict_hello, Cast(MakeScalar("hello"), dict_type));
   AssertFormat(equal(field_ref("alpha"), literal(dict_hello)), "hello");
 }
 
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index e8b8784e7a..d9fb3feaee 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -1490,7 +1490,8 @@ TEST(TestDictionaryScalar, Cast) {
       auto alpha =
           dict->IsValid(i) ? MakeScalar(dict->GetString(i)) : 
MakeNullScalar(utf8());
       // Cast string to dict(..., string)
-      ASSERT_OK_AND_ASSIGN(auto cast_alpha, alpha->CastTo(ty));
+      ASSERT_OK_AND_ASSIGN(auto cast_alpha_datum, Cast(alpha, ty));
+      const auto& cast_alpha = cast_alpha_datum.scalar();
       ASSERT_OK(cast_alpha->ValidateFull());
       ASSERT_OK_AND_ASSIGN(
           auto roundtripped_alpha,

Reply via email to