jayzhan211 commented on code in PR #7897:
URL: https://github.com/apache/arrow-datafusion/pull/7897#discussion_r1378689518
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1478,6 +1481,69 @@ macro_rules! to_string {
}};
}
+fn deduplicate_array(arg:ArrayRef) -> Result<ArrayRef> {
+ let list_arr = as_list_array(&arg)?;
+ let row_number = list_arr.len();
+ for i in 0..row_number {
+ let arr = list_arr.value(i);
+ let i64arr = as_primitive_array::<Int64Type>(&arr);
+ for v in i64arr.iter() {
+ // v is Option<i64>
+ }
+ }
+ let row_converter = RowConverter::new(vec![
+ SortField::new(
+ arg.data_type().clone()
+ )
+ ]
+ )?;
+ let converted = row_converter.convert_columns(&[arg])?;
+ let mut distinct_rows = row_converter.empty_rows(converted.num_rows(),
converted.size());
+ let mut dedup: HashSet<Row> = HashSet::with_capacity(converted.num_rows());
+ converted.iter().filter(|row| dedup.insert(*row)).for_each(|row|
distinct_rows.push(row));
+ let dedup = row_converter.convert_rows(&distinct_rows)?;
+ let res = make_array(dedup.as_slice())?;
+
+ Ok(res)
+}
+
+
+/// Array_union SQL function
+pub fn array_union(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 2 {
+ return exec_err!("array_union needs two arguments")
+ }
+ let array1 = &args[0];
+ let array2= &args[1];
+
+ check_datatypes("array_union", &[array1, array2])?;
+ let list1 = as_list_array(array1)?;
+ let list2 = as_list_array(array2)?;
+
+ match (list1.value_type(), list2.value_type()){
+ (DataType::Null, _) => {
+ Ok(array2.clone())
+ },
+ (_, DataType::Null) => {
+ Ok(array1.clone())
+ }
+ (DataType::List(_), DataType::List(_)) => {
+ let result = concat_internal(args)?;
+ deduplicate_array(result)
+ },
+ // These are the same as confirmed from check_datatypes
+ (_data_type1, _data_type_2) => {
+ eprintln!("Array1 {:?}",array1);
+ eprintln!("Array2 {:?}",array2);
+ let arrays = vec![array1.as_ref(), array2.as_ref()];
+ let result = arrow::compute::concat(arrays.as_slice())?;
+ eprintln!("Result {:?}",result);
+ deduplicate_array(result)
Review Comment:
> Even assuming deduplication now doesn't work, I get the following error
when running
>
> ```sh
> RUST_BACKTRACE=1 cargo test -p datafusion-sqllogictest --test sqllogictests
> ```
>
> ```rust
> External error: query columns mismatch:
> [SQL] select array_union([1, 2, 3, 4], [5, 6, 3, 4]);
> [Expected] [T][T][T]
> [Actual ] [?]
> at test_files/array.slt:1748
> ```
>
> I am not sure what is the root cause, my change in
expr/src/built_in_function.rs seemed reasonable
>
I think it's because your return type isn't correct. Show me your code, it
seems you haven't pushed your latest change
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]