jcsherin commented on code in PR #12811:
URL: https://github.com/apache/datafusion/pull/12811#discussion_r1799868720
##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -257,31 +257,55 @@ fn create_built_in_window_expr(
}
}
BuiltInWindowFunction::Lag => {
- let arg = Arc::clone(&args[0]);
+ let mut arg = Arc::clone(&args[0]);
let shift_offset = get_scalar_value_from_args(args, 1)?
.map(get_signed_integer)
.map_or(Ok(None), |v| v.map(Some))?;
- let default_value =
- get_casted_value(get_scalar_value_from_args(args, 2)?,
out_data_type)?;
+ // If value is NULL, we use default data type as output data type,
no need to cast data type
+ let default_value = match out_data_type {
+ DataType::Null => match get_scalar_value_from_args(args, 2)? {
+ Some(value) => {
+ let null_value =
ScalarValue::try_from(value.data_type())?;
+ arg = Arc::new(Literal::new(null_value));
+ value
+ }
+ None => ScalarValue::try_from(DataType::Null)?,
+ },
+ _ => {
+ get_casted_value(get_scalar_value_from_args(args, 2)?,
out_data_type)?
+ }
+ };
Review Comment:
After thinking about this for some more time, I found a better way which
requires no changes to `get_casted_value`.
We just need to rewrite the expression (1st argument) and the return type
which is same as the datatype of the expression, when NULL is passed. The key
here is also rewriting the return type, not just the expression. This allows us
the old code for parsing the default value without any further modifications.
### 1. Parsing window function arguments
```rust
BuiltInWindowFunction::Lag => {
let (arg, out_data_type) =
rewrite_null_expr_and_data_type(args, out_data_type); // <-
rewrite NULL expression and the return datatype
let shift_offset = get_scalar_value_from_args(args, 1)?
.map(get_signed_integer)
.map_or(Ok(None), |v| v.map(Some))?;
let default_value =
get_casted_value(get_scalar_value_from_args(args, 2)?,
&out_data_type)?; // <- No changes needed here. Same as earlier. See diff
(deleted code).
Arc::new(lag(
name,
default_value.data_type().clone(),
arg,
shift_offset,
default_value,
ignore_nulls,
))
}
```
### 2. Rewrite NULL and return data type
```rust
fn rewrite_null_expr_and_data_type(
args: &[Arc<dyn PhysicalExpr>],
expr_type: &DataType,
) -> (Arc<dyn PhysicalExpr>, DataType) {
let expr = Arc::clone(&args[0]);
// The input expression and the return is type is unchanged
// when the input expression is not NULL.
if !expr_type.is_null() {
return (expr, expr_type.clone());
}
// Rewrites the NULL expression (1st argument) with an expression
// which is the same data type as the default value (3rd argument).
// Also rewrites the return type with the same data type as the
// default value.
//
// If a default value is not provided, or it is NULL the original
// expression (1st argument) and return type is returned without
// any modifications.
get_scalar_value_from_args(args, 2)
.unwrap()
.and_then(|value| {
ScalarValue::try_from(value.data_type().clone())
.map(|sv| {
(
Arc::new(Literal::new(sv)) as Arc<dyn PhysicalExpr>,
value.data_type().clone(),
)
})
.ok()
})
.unwrap_or((expr, expr_type.clone()))
}
```
I've verified locally and these code changes will all the newly added
sqllogictests in this PR.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]