Re: [PR] fix: default UDWFImpl::expressions returns all expressions [datafusion]

via GitHub Wed, 30 Oct 2024 05:10:52 -0700


jcsherin commented on PR #13169:
URL: https://github.com/apache/datafusion/pull/13169#issuecomment-2446924878


   Since the testing approach made sense, I rewrote it to be a bit more 
exhaustive than the initial iteration. 
   
   I added more test cases to cover when 0, 1, 2 and 3 input expressions are 
provided. Earlier version only checks for exactly 3 arguments and I think that 
is insufficient testing to avoid any regressions in the future.
   
   It passes in `main` when there are only 0 or 1 argument. It fails when 2 or 
more arguments are provided. The failing test cases will only pass with 
@Michael-J-Ward's fix. 
   
   ```console
   $ cargo test -p datafusion-functions-window test_default_expressions
   
   running 1 test
   test tests::test_default_expressions ... FAILED
   
   failures:
   
   ---- tests::test_default_expressions stdout ----
   thread 'tests::test_default_expressions' panicked at 
datafusion/functions-window/src/lib.rs:192:13:
   assertion `left == right` failed:
   Input expressions: [Column { name: "a", index: 0 }, Column { name: "b", 
index: 1 }]
   Returned expressions: [Column { name: "a", index: 0 }]
     left: 2
    right: 1
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   
   
   failures:
       tests::test_default_expressions
   
   test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 10 filtered 
out; finished in 0.00s
   
   ```
   
   The same limitation as earlier applies. It is not possible to place this 
unit test in `datafusion/expr/src/udwf.rs` because of this dependency: 
`datafusion_physical_expr::expressions::lit`.
   
   I think we can add this to the `functions-window` crate.
   
   ---
   
   @timsaucer Please feel free to make any improvements you feel would be 
beneficial.
   
   Code:
   
   ```rust
   #[cfg(test)]
   mod tests {
       use arrow::datatypes::{DataType, Field, Schema};
       use datafusion_common::Result;
       use datafusion_expr::{
           PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDF,
           WindowUDFImpl,
       };
       use datafusion_functions_window_common::expr::ExpressionArgs;
       use datafusion_functions_window_common::field::WindowUDFFieldArgs;
       use 
datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
       use datafusion_physical_expr::expressions::{col, lit};
       use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
       use std::any::Any;
   
       #[derive(Debug)]
       struct VariadicWindowUDF {
           signature: Signature,
       }
   
       impl VariadicWindowUDF {
           fn new() -> Self {
               Self {
                   signature: Signature::one_of(
                       vec![
                           TypeSignature::Any(0),
                           TypeSignature::Any(1),
                           TypeSignature::Any(2),
                           TypeSignature::Any(3),
                       ],
                       Volatility::Immutable,
                   ),
               }
           }
       }
   
       impl WindowUDFImpl for VariadicWindowUDF {
           fn as_any(&self) -> &dyn Any {
               self
           }
   
           fn name(&self) -> &str {
               "variadic_window_udf"
           }
   
           fn signature(&self) -> &Signature {
               &self.signature
           }
   
           fn partition_evaluator(
               &self,
               _: PartitionEvaluatorArgs,
           ) -> Result<Box<dyn PartitionEvaluator>> {
               unimplemented!("unnecessary for testing");
           }
   
           fn field(&self, _: WindowUDFFieldArgs) -> Result<Field> {
               unimplemented!("unnecessary for testing");
           }
       }
   
       #[test]
       // Fixes: default implementation of `WindowUDFImpl::expressions`
       // returns all input expressions to the user-defined window
       // function unmodified.
       //
       // See: https://github.com/apache/datafusion/pull/13169
       fn test_default_expressions() -> Result<()> {
           let udwf = WindowUDF::from(VariadicWindowUDF::new());
   
           let field_a = Field::new("a", DataType::Int32, false);
           let field_b = Field::new("b", DataType::Float32, false);
           let field_c = Field::new("c", DataType::Boolean, false);
           let schema = Schema::new(vec![field_a, field_b, field_c]);
   
           let test_cases = vec![
               //
               // Zero arguments
               //
               vec![],
               //
               // Single argument
               //
               vec![col("a", &schema)?],
               vec![lit(1)],
               //
               // Two arguments
               //
               vec![col("a", &schema)?, col("b", &schema)?],
               vec![col("a", &schema)?, lit(2)],
               vec![lit(false), col("a", &schema)?],
               //
               // Three arguments
               //
               vec![col("a", &schema)?, col("b", &schema)?, col("c", &schema)?],
               vec![col("a", &schema)?, col("b", &schema)?, lit(false)],
               vec![col("a", &schema)?, lit(0.5), col("c", &schema)?],
               vec![lit(3), col("b", &schema)?, col("c", &schema)?],
           ];
   
           for input_exprs in &test_cases {
               let input_types = input_exprs
                   .iter()
                   .map(|expr: &std::sync::Arc<dyn PhysicalExpr>| {
                       expr.data_type(&schema).unwrap()
                   })
                   .collect::<Vec<_>>();
               let expr_args = ExpressionArgs::new(input_exprs, &input_types);
   
               let ret_exprs = udwf.expressions(expr_args);
   
               // Verify same number of input expressions are returned
               assert_eq!(
                   input_exprs.len(),
                   ret_exprs.len(),
                   "\nInput expressions: {:?}\nReturned expressions: {:?}",
                   input_exprs,
                   ret_exprs
               );
   
               // Compares each returned expression with original input 
expressions
               for (expected, actual) in input_exprs.iter().zip(&ret_exprs) {
                   assert_eq!(
                       format!("{expected:?}"),
                       format!("{actual:?}"),
                       "\nInput expressions: {:?}\nReturned expressions: {:?}",
                       input_exprs,
                       ret_exprs
                   );
               }
           }
           Ok(())
       }
   }
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] fix: default UDWFImpl::expressions returns all expressions [datafusion]

Reply via email to