jayzhan211 commented on code in PR #12153:
URL: https://github.com/apache/datafusion/pull/12153#discussion_r1740026655
##########
datafusion/functions-nested/src/map.rs:
##########
@@ -51,24 +52,65 @@ fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool {
.all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
}
-fn make_map_batch(args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
if args.len() != 2 {
return exec_err!(
"make_map requires exactly 2 arguments, got {} instead",
args.len()
);
}
- let data_type = args[0].data_type();
let can_evaluate_to_const = can_evaluate_to_const(args);
- let key = get_first_array_ref(&args[0])?;
- let value = get_first_array_ref(&args[1])?;
- make_map_batch_internal(key, value, can_evaluate_to_const, data_type)
+
+ // check the keys array is unique
+ let keys = get_first_array_ref(&args[0])?;
+ let key_array = keys.as_ref();
+
+ match &args[0] {
+ ColumnarValue::Array(_) => {
+ let row_keys = match key_array.data_type() {
+ DataType::List(_) => list_to_arrays::<i32>(&keys),
+ DataType::LargeList(_) => list_to_arrays::<i64>(&keys),
+ DataType::FixedSizeList(_, _) =>
fixed_size_list_to_arrays(&keys),
+ data_type => {
+ return exec_err!(
+ "Expected list, large_list or fixed_size_list, got
{:?}",
+ data_type
+ );
+ }
+ };
+
+ row_keys
+ .iter()
+ .try_for_each(|key| check_unique_keys(key.as_ref()))?;
+ }
+ ColumnarValue::Scalar(_) => {
+ check_unique_keys(key_array)?;
+ }
+ }
+
+ let values = get_first_array_ref(&args[1])?;
+ make_map_batch_internal(keys, values, can_evaluate_to_const,
args[0].data_type())
+}
+
+fn check_unique_keys(array: &dyn Array) -> Result<()> {
Review Comment:
Will it be faster if we check early?
```rust
fn plan_make_map(&self, args: Vec<Expr>) ->
Result<PlannerResult<Vec<Expr>>> {
if args.len() % 2 != 0 {
return plan_err!("make_map requires an even number of
arguments");
}
let (keys, values): (Vec<_>, Vec<_>) =
args.into_iter().enumerate().partition(|(i, _)| i % 2 == 0);
// check here?
let keys = make_array(keys.into_iter().map(|(_, e)| e).collect());
let values = make_array(values.into_iter().map(|(_, e)|
e).collect());
Ok(PlannerResult::Planned(Expr::ScalarFunction(
ScalarFunction::new_udf(map_udf(), vec![keys, values]),
)))
}
```
```rust
pub fn map(keys: Vec<Expr>, values: Vec<Expr>) -> Expr {
// check here
let keys = make_array(keys);
let values = make_array(values);
Expr::ScalarFunction(ScalarFunction::new_udf(map_udf(), vec![keys,
values]))
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]