lidavidm commented on a change in pull request #12080:
URL: https://github.com/apache/arrow/pull/12080#discussion_r813887878
##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -772,14 +769,27 @@ class ScalarExecutor : public
KernelExecutorImpl<ScalarKernel> {
return Status::OK();
}
- Status SetupPreallocation(int64_t total_length) {
+ Status SetupPreallocation(int64_t total_length, const std::vector<Datum>&
args) {
output_num_buffers_ =
static_cast<int>(output_descr_.type->layout().buffers.size());
-
- // Decide if we need to preallocate memory for this kernel
- validity_preallocated_ =
- (kernel_->null_handling != NullHandling::COMPUTED_NO_PREALLOCATE &&
- kernel_->null_handling != NullHandling::OUTPUT_NOT_NULL &&
- output_descr_.type->id() != Type::NA);
+ auto out_type_id = output_descr_.type->id();
+ // Default to no validity pre-allocation for following cases:
+ // - Output Array is NullArray
+ // - kernel_->null_handling is COMPUTED_NO_PREALLOCATE or OUTPUT_NOT_NULL
+ validity_preallocated_ = false;
+ if (out_type_id != Type::NA) {
+ if (kernel_->null_handling == NullHandling::COMPUTED_PREALLOCATE) {
+ // Override the flag if kernel asks for pre-allocation
+ validity_preallocated_ = true;
+ } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
+ bool are_all_inputs_valid = true;
+ for (auto& arg : args) {
+ auto null_gen = NullGeneralization::Get(arg) ==
NullGeneralization::ALL_VALID;
+ are_all_inputs_valid = are_all_inputs_valid && null_gen;
+ }
+ validity_preallocated_ =
+ !(are_all_inputs_valid || kernel_->can_write_into_slices);
Review comment:
Why does `can_write_into_slices` disable preallocation?
##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -792,7 +802,7 @@ class ScalarExecutor : public
KernelExecutorImpl<ScalarKernel> {
preallocate_contiguous_ =
(exec_context()->preallocate_contiguous() &&
kernel_->can_write_into_slices &&
validity_preallocated_ && !is_nested(output_descr_.type->id()) &&
- !is_dictionary(output_descr_.type->id()) &&
+ !is_dictionary(out_type_id) &&
Review comment:
nit: if we're going to update this here, also update the usage of
`output_descr_.type->id()` on the previous line?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]