jhorstmann commented on a change in pull request #8280:
URL: https://github.com/apache/arrow/pull/8280#discussion_r495432114
##########
File path: rust/arrow/src/compute/kernels/comparison.rs
##########
@@ -555,11 +555,159 @@ where
compare_op_scalar!(left, right, |a, b| a >= b)
}
+/// Checks if a `GenericListArray` contains a value in the `PrimitiveArray`
+pub fn contains<T, OffsetSize>(
+ left: &PrimitiveArray<T>,
+ right: &GenericListArray<OffsetSize>,
+) -> Result<BooleanArray>
+where
+ T: ArrowNumericType,
+ OffsetSize: OffsetSizeTrait,
+{
+ if left.len() != right.len() {
+ return Err(ArrowError::ComputeError(
+ "Cannot perform comparison operation on arrays of different length"
+ .to_string(),
+ ));
+ }
+
+ let not_both_null_bit_buffer =
+ match compare_option_bitmap(left.data_ref(), right.data_ref(),
left.len())? {
+ Some(buff) => buff,
+ None => new_all_set_buffer(left.len()),
+ };
+ let not_both_null_bitmap = not_both_null_bit_buffer.data();
+
+ let left_data = left.data();
+ let left_null_bitmap = match left_data.null_bitmap() {
+ Some(bitmap) => bitmap.clone().into_buffer(),
+ _ => new_all_set_buffer(left.len()),
+ };
+ let left_null_bitmap = left_null_bitmap.data();
+
+ let mut result = BooleanBufferBuilder::new(left.len());
+
+ for i in 0..left.len() {
+ let mut is_in = false;
+
+ // contains(null, null) = false
+ if bit_util::get_bit(not_both_null_bitmap, i) {
+ let list = right.value(i);
+
+ // contains(null, [null]) = true
+ if !bit_util::get_bit(left_null_bitmap, i) {
+ if list.null_count() > 0 {
+ is_in = true;
+ }
+ } else {
+ let list =
list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+
+ for j in 0..list.len() {
+ if list.is_valid(j) && (left.value(i) == list.value(j)) {
+ is_in = true;
+ }
+ }
+ }
+ }
+ result.append(is_in)?;
+ }
+
+ let data = ArrayData::new(
+ DataType::Boolean,
+ left.len(),
+ None,
+ None,
+ left.offset(),
+ vec![result.finish()],
+ vec![],
+ );
+ Ok(PrimitiveArray::<BooleanType>::from(Arc::new(data)))
+}
+
+/// Checks if a `GenericListArray` contains a value in the `GenericStringArray`
+pub fn contains_utf8<OffsetSize>(
+ left: &GenericStringArray<OffsetSize>,
+ right: &ListArray,
+) -> Result<BooleanArray>
+where
+ OffsetSize: OffsetSizeTrait,
+{
+ if left.len() != right.len() {
+ return Err(ArrowError::ComputeError(
+ "Cannot perform comparison operation on arrays of different length"
+ .to_string(),
+ ));
+ }
+
+ let not_both_null_bit_buffer =
+ match compare_option_bitmap(left.data_ref(), right.data_ref(),
left.len())? {
+ Some(buff) => buff,
+ None => new_all_set_buffer(left.len()),
Review comment:
I think this is creating a buffer that is larger than needed since it is
a bitmap storing 8 values per byte. Correct size would be `ceil(left.len(), 8)`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]