mkleen commented on code in PR #8963:
URL: https://github.com/apache/arrow-rs/pull/8963#discussion_r2654841285
##########
arrow-select/src/zip.rs:
##########
@@ -657,6 +665,181 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray)
-> BooleanBuffer {
}
}
+struct ByteViewScalarImpl<T: ByteViewType> {
+ truthy: Option<GenericByteViewArray<T>>,
+ falsy: Option<GenericByteViewArray<T>>,
+ phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+ fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+ Self {
+ truthy: Self::get_value_from_scalar(truthy),
+ falsy: Self::get_value_from_scalar(falsy),
+ phantom: PhantomData,
+ }
+ }
+
+ fn get_value_from_scalar(scalar: &dyn Array) ->
Option<GenericByteViewArray<T>> {
+ if scalar.is_null(0) {
+ None
+ } else {
+ Some(scalar.as_byte_view().clone())
+ }
+ }
+
+ fn get_scalar_buffers_and_nulls_for_all_values_null(
+ len: usize,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let mut mutable = MutableBuffer::with_capacity(0);
+ mutable.repeat_slice_n_times((0u128).to_byte_slice(), len);
+
+ (mutable.into(), vec![], Some(NullBuffer::new_null(len)))
+ }
+
+ fn get_scalar_buffers_and_nulls_for_single_non_nullable(
+ predicate: BooleanBuffer,
+ value: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let number_of_true = predicate.count_set_bits();
+ let number_of_values = predicate.len();
+
+ // Fast path for all nulls
+ if number_of_true == 0 {
+ // All values are null
+ return
Self::get_scalar_buffers_and_nulls_for_all_values_null(number_of_values);
+ }
+ let view = value.views()[0].to_byte_slice();
+ let mut bytes = MutableBuffer::with_capacity(0);
+ bytes.repeat_slice_n_times(view, number_of_values);
+
+ let bytes = Buffer::from(bytes);
+
+ // If a value is true we need the TRUTHY and the null buffer will have
1 (meaning not null)
+ // If a value is false we need the FALSY and the null buffer will have
0 (meaning null)
+ let nulls = NullBuffer::new(predicate);
+ (bytes.into(), value.data_buffers().into(), Some(nulls))
+ }
+
+ fn get_scalar_buffers_and_nulls_non_nullable(
+ predicate: BooleanBuffer,
+ truthy: &GenericByteViewArray<T>,
+ falsy: &GenericByteViewArray<T>,
+ ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+ let true_count = predicate.count_set_bits();
+ let view_truthy = truthy.views()[0].to_byte_slice();
+ let mut buffers: Vec<Buffer> = truthy.data_buffers().to_vec();
+
+ // if falsy has non-inlined values in the buffer,
+ // include the buffers and recalculate the view,
+ // otherwise, we simply use the view.
+ let view_falsy = if falsy.total_buffer_bytes_used() > 0 {
+ let byte_view_falsy = ByteView::from(falsy.views()[0]);
+ let new_index_falsy_buffers = buffers.len() as u32;
+ buffers.extend(falsy.data_buffers().to_vec());
+ let byte_view_falsy =
byte_view_falsy.with_buffer_index(new_index_falsy_buffers);
+ byte_view_falsy.as_u128()
+ } else {
+ falsy.views()[0]
+ };
+
+ let total_number_of_bytes = true_count * view_truthy.len()
+ + (predicate.len() - true_count) *
view_falsy.to_byte_slice().len();
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]