feniljain commented on PR #19415: URL: https://github.com/apache/datafusion/pull/19415#issuecomment-3677957617
Hey @Jefffrey 👋🏻 Upon looking into this a bit more, it seems one of the limitations to returning `NULL` here stems from `make_scalar_function` expecting a function returning `Result<ArrayRef>` [source](https://github.com/apache/datafusion/blob/d8e68a404aa7955b4913bab1c658a4ec5c82f0fc/datafusion/functions-nested/src/utils.rs#L53-L58) We can't change this function as its widely used by a lot of functions, dumping list I received here: ``` datafusion/functions-nested/src/array_has.rs|38 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/array_has.rs|541 col 9-29| make_scalar_function(array_has_all_inner)(&args.args) datafusion/functions-nested/src/array_has.rs|615 col 9-29| make_scalar_function(array_has_any_inner)(&args.args) datafusion/functions-nested/src/cardinality.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/cardinality.rs|112 col 9-29| make_scalar_function(cardinality_inner)(&args.args) datafusion/functions-nested/src/concat.rs|24 col 61-81| use crate::utils::{align_array_dimensions, check_datatypes, make_scalar_function}; datafusion/functions-nested/src/concat.rs|124 col 9-29| make_scalar_function(array_append_inner)(&args.args) datafusion/functions-nested/src/concat.rs|213 col 9-29| make_scalar_function(array_prepend_inner)(&args.args) datafusion/functions-nested/src/concat.rs|333 col 9-29| make_scalar_function(array_concat_inner)(&args.args) datafusion/functions-nested/src/dimension.rs|33 col 40-60| use crate::utils::{compute_array_dims, make_scalar_function}; datafusion/functions-nested/src/dimension.rs|108 col 9-29| make_scalar_function(array_dims_inner)(&args.args) datafusion/functions-nested/src/dimension.rs|180 col 9-29| make_scalar_function(array_ndims_inner)(&args.args) datafusion/functions-nested/src/distance.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/distance.rs|132 col 9-29| make_scalar_function(array_distance_inner)(&args.args) datafusion/functions-nested/src/empty.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/empty.rs|101 col 9-29| make_scalar_function(array_empty_inner)(&args.args) datafusion/functions-nested/src/except.rs|20 col 37-57| use crate::utils::{check_datatypes, make_scalar_function}; datafusion/functions-nested/src/except.rs|117 col 9-29| make_scalar_function(array_except_inner)(&args.args) datafusion/functions-nested/src/extract.rs|51 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/extract.rs|179 col 9-29| make_scalar_function(array_element_inner)(&args.args) datafusion/functions-nested/src/extract.rs|402 col 9-29| make_scalar_function(array_slice_inner)(&args.args) datafusion/functions-nested/src/extract.rs|849 col 9-29| make_scalar_function(array_pop_front_inner)(&args.args) datafusion/functions-nested/src/extract.rs|945 col 9-29| make_scalar_function(array_pop_back_inner)(&args.args) datafusion/functions-nested/src/extract.rs|1050 col 9-29| make_scalar_function(array_any_value_inner)(&args.args) datafusion/functions-nested/src/flatten.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/flatten.rs|121 col 9-29| make_scalar_function(flatten_inner)(&args.args) datafusion/functions-nested/src/length.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/length.rs|124 col 9-29| make_scalar_function(array_length_inner)(&args.args) datafusion/functions-nested/src/make_array.rs|24 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/make_array.rs|120 col 9-29| make_scalar_function(make_array_inner)(&args.args) datafusion/functions-nested/src/map_entries.rs|20 col 41-61| use crate::utils::{get_map_entry_field, make_scalar_function}; datafusion/functions-nested/src/map_entries.rs|118 col 9-29| make_scalar_function(map_entries_inner)(&args.args) datafusion/functions-nested/src/map_extract.rs|20 col 41-61| use crate::utils::{get_map_entry_field, make_scalar_function}; datafusion/functions-nested/src/map_extract.rs|117 col 9-29| make_scalar_function(map_extract_inner)(&args.args) datafusion/functions-nested/src/map_keys.rs|20 col 41-61| use crate::utils::{get_map_entry_field, make_scalar_function}; datafusion/functions-nested/src/map_keys.rs|108 col 9-29| make_scalar_function(map_keys_inner)(&args.args) datafusion/functions-nested/src/map_values.rs|20 col 41-61| use crate::utils::{get_map_entry_field, make_scalar_function}; datafusion/functions-nested/src/map_values.rs|118 col 9-29| make_scalar_function(map_values_inner)(&args.args) datafusion/functions-nested/src/min_max.rs|19 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/min_max.rs|104 col 9-29| make_scalar_function(array_max_inner)(&args.args) datafusion/functions-nested/src/min_max.rs|190 col 9-29| make_scalar_function(array_min_inner)(&args.args) datafusion/functions-nested/src/position.rs|45 col 45-65| use crate::utils::{compare_element_to_list, make_scalar_function}; datafusion/functions-nested/src/position.rs|132 col 9-29| make_scalar_function(array_position_inner)(&args.args) datafusion/functions-nested/src/position.rs|283 col 9-29| make_scalar_function(array_positions_inner)(&args.args) datafusion/functions-nested/src/range.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/range.rs|266 col 17-37| make_scalar_function(|args| self.gen_range_inner(args))(args) datafusion/functions-nested/src/range.rs|269 col 17-37| make_scalar_function(|args| self.gen_range_date(args))(args) datafusion/functions-nested/src/range.rs|272 col 17-37| make_scalar_function(|args| self.gen_range_timestamp(args))(args) datafusion/functions-nested/src/remove.rs|21 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/remove.rs|117 col 9-29| make_scalar_function(array_remove_inner)(&args.args) datafusion/functions-nested/src/remove.rs|212 col 9-29| make_scalar_function(array_remove_n_inner)(&args.args) datafusion/functions-nested/src/remove.rs|296 col 9-29| make_scalar_function(array_remove_all_inner)(&args.args) datafusion/functions-nested/src/repeat.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/repeat.rs|122 col 9-29| make_scalar_function(array_repeat_inner)(&args.args) datafusion/functions-nested/src/replace.rs|37 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/replace.rs|136 col 9-29| make_scalar_function(array_replace_inner)(&args.args) datafusion/functions-nested/src/replace.rs|218 col 9-29| make_scalar_function(array_replace_n_inner)(&args.args) datafusion/functions-nested/src/replace.rs|298 col 9-29| make_scalar_function(array_replace_all_inner)(&args.args) datafusion/functions-nested/src/resize.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/resize.rs|143 col 9-29| make_scalar_function(array_resize_inner)(&args.args) datafusion/functions-nested/src/reverse.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/reverse.rs|111 col 9-29| make_scalar_function(array_reverse_inner)(&args.args) datafusion/functions-nested/src/set_ops.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/set_ops.rs|149 col 9-29| make_scalar_function(array_union_inner)(&args.args) datafusion/functions-nested/src/set_ops.rs|234 col 9-29| make_scalar_function(array_intersect_inner)(&args.args) datafusion/functions-nested/src/set_ops.rs|299 col 9-29| make_scalar_function(array_distinct_inner)(&args.args) datafusion/functions-nested/src/sort.rs|20 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/sort.rs|155 col 9-29| make_scalar_function(array_sort_inner)(&args.args) datafusion/functions-nested/src/string.rs|33 col 19-39| use crate::utils::make_scalar_function; datafusion/functions-nested/src/string.rs|216 col 9-29| make_scalar_function(array_to_string_inner)(&args.args) datafusion/functions-nested/src/string.rs|315 col 32-52| Utf8 | Utf8View => make_scalar_function(string_to_array_inner::<i32>)(args), datafusion/functions-nested/src/string.rs|316 col 26-46| LargeUtf8 => make_scalar_function(string_to_array_inner::<i64>)(args), datafusion/functions-nested/src/utils.rs|53 col 15-35| pub(crate) fn make_scalar_function<F>( ``` This raises two points in my head: - Should we check implementation of each of these functions above and see how is `NULL` handled in each of them? - If we decide to just fix `array_intersect` and `array_union`, we would have to create a new function similar to `make_scalar_function` which would can return `NULL` too Either way, this seems like it needs a new PR, I can add `array_union` test and we can continue work on fixing support for all of these (I can file a new issue). What do you think? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
