tustvold commented on code in PR #8081:
URL: https://github.com/apache/arrow-datafusion/pull/8081#discussion_r1386758070
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1821,88 +1822,95 @@ pub fn array_has_all(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(Arc::new(boolean_builder.finish()))
}
-macro_rules! array_intersect_normal {
- ($FIRST_ARRAY:expr, $SECOND_ARRAY:expr, $DATA_TYPE:expr,
$ARRAY_TYPE:ident, $BUILDER:ident) => {{
- let mut offsets: Vec<i32> = vec![0];
- let mut values =
- downcast_arg!(new_empty_array(&$DATA_TYPE), $ARRAY_TYPE).clone();
-
- for (first_arr, second_arr) in
$FIRST_ARRAY.iter().zip($SECOND_ARRAY.iter()) {
- let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
- DataFusionError::Internal(format!("offsets should not be
empty"))
- })?;
- match (first_arr, second_arr) {
- (Some(first_arr), Some(second_arr)) => {
- let first_arr = downcast_arg!(first_arr, $ARRAY_TYPE);
- // TODO(veeupup): maybe use stack-implemented map to avoid
heap memory allocation
- let first_set =
first_arr.iter().dedup().flatten().collect::<HashSet<_>>();
- let second_arr = downcast_arg!(second_arr, $ARRAY_TYPE);
-
- let mut builder = $BUILDER::new();
- for elem in second_arr.iter().dedup().flatten() {
- if first_set.contains(&elem) {
- builder.append_value(elem);
- }
- }
-
- let arr = builder.finish();
- values = downcast_arg!(
- compute::concat(&[
- &values,
- &arr
- ])?
- .clone(),
- $ARRAY_TYPE
- )
- .clone();
- offsets.push(last_offset + arr.len() as i32);
- },
- _ => {
- offsets.push(last_offset);
- }
- }
- }
- let field = Arc::new(Field::new("item", $DATA_TYPE, true));
-
- Ok(Arc::new(ListArray::try_new(
- field,
- OffsetBuffer::new(offsets.into()),
- Arc::new(values),
- None,
- )?))
-
- }};
-}
-
/// array_intersect SQL function
pub fn array_intersect(args: &[ArrayRef]) -> Result<ArrayRef> {
assert_eq!(args.len(), 2);
let first_array = as_list_array(&args[0])?;
let second_array = as_list_array(&args[1])?;
- match (first_array.value_type(), second_array.value_type()) {
+ let dt = match (first_array.value_type(), second_array.value_type()) {
Review Comment:
Do we even need this match block now, could we just verify the two arrays
have the same type?
Edit: You could even omit this check as RowConverter will do it for you, but
checking might yield a more informative error should type coercion fail for
some reason.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]