llama90 commented on code in PR #39362:
URL: https://github.com/apache/arrow/pull/39362#discussion_r1442466313
##########
cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc:
##########
@@ -77,17 +85,23 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan&
batch, ExecResult* o
return Status::OK();
}
-std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
- auto func = std::make_shared<CastFunction>("cast_dictionary",
Type::DICTIONARY);
-
- AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get());
- ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType,
CastToDictionary);
+template <typename SrcType>
+void AddDictionaryCast(CastFunction* func) {
+ ScalarKernel kernel;
+ kernel.exec = CastToDictionary;
+ kernel.signature =
+ KernelSignature::Make({InputType(SrcType::type_id)}, kOutputTargetType);
kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
- kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
Review Comment:
too.
##########
cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc:
##########
@@ -44,6 +44,14 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan&
batch, ExecResult* o
}
std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
+
+ // If the input type is STRING, it is first encoded as a dictionary to
facilitate
+ // processing. This approach allows the subsequent code to uniformly handle
STRING
+ // inputs as if they were originally provided in dictionary format. Encoding
as a
+ // dictionary helps in reusing the same logic for dictionary operations.
+ if (batch[0].type()->id() == Type::STRING) {
+ in_array = DictionaryEncode(batch[0].array.ToArrayData())->array();
Review Comment:
I agree.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]