zagto commented on code in PR #13657:
URL: https://github.com/apache/arrow/pull/13657#discussion_r944951683
##########
cpp/src/arrow/compute/kernels/vector_selection.cc:
##########
@@ -841,6 +907,222 @@ Status PrimitiveFilter(KernelContext* ctx, const
ExecSpan& batch, ExecResult* ou
return Status::OK();
}
+/// \brief The Filter implementation for primitive (fixed-width) types does not
+/// use the logical Arrow type but rather the physical C type. This way we only
+/// generate one take function for each byte width. We use the same
+/// implementation here for boolean and fixed-byte-size inputs with some
+/// template specialization.
+template <typename ArrowType>
+class RLEPrimitiveFilterImpl {
+ public:
+ using T = typename std::conditional<std::is_same<ArrowType,
BooleanType>::value,
+ uint8_t, typename
ArrowType::c_type>::type;
+
+ RLEPrimitiveFilterImpl(const ArraySpan& values, const ArraySpan& filter,
+ FilterOptions::NullSelectionBehavior null_selection,
+ ArrayData* out_arr)
+ : values_{values},
+ values_is_valid_(values.child_data[0].buffers[0].data),
+ values_data_(reinterpret_cast<const
T*>(values.child_data[0].buffers[1].data)),
+ filter_{filter},
+ filter_is_valid_(filter.child_data[0].buffers[0].data),
+ filter_data_(filter.child_data[0].buffers[1].data),
+ null_selection_(null_selection),
+ out_logical_length_(out_arr->length) {
+ if (out_arr->child_data[0]->buffers[0] != nullptr) {
+ // May not be allocated if neither filter nor values contains nulls
+ out_is_valid_ = out_arr->child_data[0]->buffers[0]->mutable_data();
+ }
+ assert(out_arr->offset == 0);
+ out_position_ = 0;
+ out_run_length_ = out_arr->GetMutableValues<int64_t>(0, 0);
+ out_data_ =
reinterpret_cast<T*>(out_arr->child_data[0]->buffers[1]->mutable_data());
+ }
+
+ void Exec() {
Review Comment:
The primary reason is that runs of filter array does not necessarily have
the the same run lengths as the RLE array we are filtering. For example if the
data is filtered based on the value of a different column.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]