jayzhan211 commented on code in PR #7897:
URL: https://github.com/apache/arrow-datafusion/pull/7897#discussion_r1386573311


##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1358,6 +1360,94 @@ macro_rules! to_string {
     }};
 }
 
+fn union_generic_lists<OffsetSize: OffsetSizeTrait>(
+    l: &GenericListArray<OffsetSize>,
+    r: &GenericListArray<OffsetSize>,
+) -> Result<GenericListArray<OffsetSize>, DataFusionError> {
+    let converter =
+        
RowConverter::new(vec![SortField::new(l.value_type().clone())]).unwrap();
+    
+    let nulls = NullBuffer::union(l.nulls(), r.nulls());
+    let field = Arc::new(Field::new(
+        "item",
+        l.value_type().to_owned(),
+        l.is_nullable(),
+    ));
+    let l_values = l.values().clone();
+    let r_values = r.values().clone();
+    let l_values = converter.convert_columns(&[l_values]).unwrap();
+    let r_values = converter.convert_columns(&[r_values]).unwrap();
+
+    // Might be worth adding an upstream OffsetBufferBuilder
+    let mut offsets = Vec::<OffsetSize>::with_capacity(l.len() + 1);
+    offsets.push(OffsetSize::usize_as(0));
+    let mut rows = Vec::with_capacity(l_values.num_rows() + 
r_values.num_rows());
+
+    for (l_w, r_w) in l.offsets().windows(2).zip(r.offsets().windows(2)) {
+        let mut dedup = HashSet::new();
+        // Needed to preserve ordering
+        let mut row_elements:Vec<Row<'_>> = vec![];
+        let l_slice = l_w[0].as_usize()..l_w[1].as_usize();
+        let r_slice = r_w[0].as_usize()..r_w[1].as_usize();
+        for i in l_slice {
+            let left_row = l_values.row(i);
+            if dedup.insert(left_row) {
+                row_elements.push(left_row);
+            }
+        }
+        for i in r_slice {
+            let right_row=r_values.row(i);
+            if dedup.insert(right_row){
+                row_elements.push(right_row);
+            }
+        }
+
+        rows.extend(row_elements.iter());
+        offsets.push(OffsetSize::usize_as(rows.len()));
+        dedup.clear();
+        row_elements.clear();
+    }
+
+    let values = converter.convert_rows(rows).unwrap();
+    let offsets = OffsetBuffer::new(offsets.into());
+    let result = values[0].clone();
+    Ok(GenericListArray::<OffsetSize>::new(
+        field, offsets, result, nulls,
+    ))
+}
+
+/// Array_union SQL function
+pub fn array_union(args: &[ArrayRef]) -> Result<ArrayRef> {
+    if args.len() != 2 {
+        return exec_err!("array_union needs two arguments");
+    }
+    let array1 = &args[0];
+    let array2 = &args[1];
+    match (array1.data_type(), array2.data_type()) {
+        (DataType::Null, _) => Ok(array2.clone()),
+        (_, DataType::Null) => Ok(array1.clone()),
+        (DataType::List(_), DataType::List(_)) => {
+            check_datatypes("array_union", &[&array1, &array2])?;
+            let list1 = array1.as_list::<i32>();

Review Comment:
   I just found that arrow::array::cast does not return Result<>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to