Veeupup commented on code in PR #8081:
URL: https://github.com/apache/arrow-datafusion/pull/8081#discussion_r1386767549
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1821,88 +1822,95 @@ pub fn array_has_all(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(Arc::new(boolean_builder.finish()))
}
-macro_rules! array_intersect_normal {
- ($FIRST_ARRAY:expr, $SECOND_ARRAY:expr, $DATA_TYPE:expr,
$ARRAY_TYPE:ident, $BUILDER:ident) => {{
- let mut offsets: Vec<i32> = vec![0];
- let mut values =
- downcast_arg!(new_empty_array(&$DATA_TYPE), $ARRAY_TYPE).clone();
-
- for (first_arr, second_arr) in
$FIRST_ARRAY.iter().zip($SECOND_ARRAY.iter()) {
- let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
- DataFusionError::Internal(format!("offsets should not be
empty"))
- })?;
- match (first_arr, second_arr) {
- (Some(first_arr), Some(second_arr)) => {
- let first_arr = downcast_arg!(first_arr, $ARRAY_TYPE);
- // TODO(veeupup): maybe use stack-implemented map to avoid
heap memory allocation
- let first_set =
first_arr.iter().dedup().flatten().collect::<HashSet<_>>();
- let second_arr = downcast_arg!(second_arr, $ARRAY_TYPE);
-
- let mut builder = $BUILDER::new();
- for elem in second_arr.iter().dedup().flatten() {
- if first_set.contains(&elem) {
- builder.append_value(elem);
- }
- }
-
- let arr = builder.finish();
- values = downcast_arg!(
- compute::concat(&[
- &values,
- &arr
- ])?
- .clone(),
- $ARRAY_TYPE
- )
- .clone();
- offsets.push(last_offset + arr.len() as i32);
- },
- _ => {
- offsets.push(last_offset);
- }
- }
- }
- let field = Arc::new(Field::new("item", $DATA_TYPE, true));
-
- Ok(Arc::new(ListArray::try_new(
- field,
- OffsetBuffer::new(offsets.into()),
- Arc::new(values),
- None,
- )?))
-
- }};
-}
-
/// array_intersect SQL function
pub fn array_intersect(args: &[ArrayRef]) -> Result<ArrayRef> {
assert_eq!(args.len(), 2);
let first_array = as_list_array(&args[0])?;
let second_array = as_list_array(&args[1])?;
- match (first_array.value_type(), second_array.value_type()) {
+ let dt = match (first_array.value_type(), second_array.value_type()) {
Review Comment:
sure! I'll change it in later changes
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]