jayzhan211 commented on code in PR #9710:
URL: https://github.com/apache/arrow-datafusion/pull/9710#discussion_r1533518043


##########
datafusion/functions-array/src/set_ops.rs:
##########
@@ -273,55 +284,88 @@ fn generic_set_lists<OffsetSize: OffsetSizeTrait>(
 
     let mut offsets = vec![OffsetSize::usize_as(0)];
     let mut new_arrays = vec![];
+    let mut nulls = vec![];
 
     let converter = RowConverter::new(vec![SortField::new(dt)])?;
     for (first_arr, second_arr) in l.iter().zip(r.iter()) {
-        if let (Some(first_arr), Some(second_arr)) = (first_arr, second_arr) {
-            let l_values = converter.convert_columns(&[first_arr])?;
-            let r_values = converter.convert_columns(&[second_arr])?;
-
-            let l_iter = l_values.iter().sorted().dedup();
-            let values_set: HashSet<_> = l_iter.clone().collect();
-            let mut rows = if set_op == SetOp::Union {
-                l_iter.collect::<Vec<_>>()
-            } else {
-                vec![]
-            };
-            for r_val in r_values.iter().sorted().dedup() {
-                match set_op {
-                    SetOp::Union => {
-                        if !values_set.contains(&r_val) {
-                            rows.push(r_val);
-                        }
-                    }
-                    SetOp::Intersect => {
-                        if values_set.contains(&r_val) {
-                            rows.push(r_val);
-                        }
-                    }
+        let last_offset = match offsets.last().copied() {
+            Some(offset) => offset,
+            None => return internal_err!("offsets should not be empty"),
+        };
+        let (l_values, r_values) = match (first_arr, second_arr) {
+            (Some(_), None) if set_op == SetOp::Intersect => {
+                offsets.push(last_offset);
+                nulls.push(false);
+                continue;
+            }
+            (None, Some(_)) if matches!(set_op, SetOp::Intersect | 
SetOp::Except) => {
+                offsets.push(last_offset);
+                nulls.push(false);
+                continue;
+            }
+            (None, None) => {
+                offsets.push(last_offset);
+                nulls.push(false);
+                continue;
+            }
+            (first_arr, second_arr) => {
+                let first_arr = 
first_arr.unwrap_or(new_empty_array(&l.value_type()));
+                let second_arr = 
second_arr.unwrap_or(new_empty_array(&r.value_type()));
+                let l_values = converter.convert_columns(&[first_arr])?;
+                let r_values = converter.convert_columns(&[second_arr])?;
+                // swap l_values and r_values if set_op is Except, because the 
values
+                // in the second array should be removed from the first array
+                if set_op == SetOp::Except {
+                    (r_values, l_values)
+                } else {
+                    (l_values, r_values)
                 }
             }
+        };
 
-            let last_offset = match offsets.last().copied() {
-                Some(offset) => offset,
-                None => return internal_err!("offsets should not be empty"),
-            };
-            offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
-            let arrays = converter.convert_rows(rows)?;
-            let array = match arrays.first() {
-                Some(array) => array.clone(),
-                None => {
-                    return internal_err!("{set_op}: failed to get array from 
rows");
+        let l_iter = l_values.iter().sorted().dedup();
+        let values_set: HashSet<_> = l_iter.clone().collect();
+        let mut rows = if set_op == SetOp::Union {
+            l_iter.collect::<Vec<_>>()
+        } else {
+            vec![]
+        };
+        for r_val in r_values.iter().sorted().dedup() {
+            match set_op {
+                SetOp::Union | SetOp::Except => {
+                    if !values_set.contains(&r_val) {
+                        rows.push(r_val);
+                    }
                 }
-            };
-            new_arrays.push(array);
+                SetOp::Intersect => {
+                    if values_set.contains(&r_val) {
+                        rows.push(r_val);
+                    }
+                }
+            }
         }
+
+        offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
+        let arrays = converter.convert_rows(rows)?;
+        let array = match arrays.first() {

Review Comment:
   I recently learn `swap_remove`, I think we can apply here without clone. 
`arrays.swap_remove(0)`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to