alamb commented on code in PR #15694: URL: https://github.com/apache/datafusion/pull/15694#discussion_r2049350694
########## datafusion/physical-expr/src/expressions/binary.rs: ########## @@ -4911,20 +5065,245 @@ mod tests { .unwrap(); // op: AND left: all false - let left_expr = logical2physical(&logical_col("a").eq(lit(2)), &schema); + let left_expr = logical2physical(&logical_col("a").eq(expr_lit(2)), &schema); let left_value = left_expr.evaluate(&batch).unwrap(); - assert!(check_short_circuit(&left_value, &Operator::And)); + assert!(matches!( + check_short_circuit(&left_value, &Operator::And), + ShortCircuitStrategy::ReturnLeft + )); + // op: AND left: not all false - let left_expr = logical2physical(&logical_col("a").eq(lit(3)), &schema); + let left_expr = logical2physical(&logical_col("a").eq(expr_lit(3)), &schema); let left_value = left_expr.evaluate(&batch).unwrap(); - assert!(!check_short_circuit(&left_value, &Operator::And)); + let ColumnarValue::Array(array) = &left_value else { + panic!("Expected ColumnarValue::Array"); + }; + let ShortCircuitStrategy::PreSelection(value) = + check_short_circuit(&left_value, &Operator::And) + else { + panic!("Expected ShortCircuitStrategy::PreSelection"); + }; + let expected_boolean_arr: Vec<_> = + as_boolean_array(array).unwrap().iter().collect(); + let boolean_arr: Vec<_> = value.iter().collect(); + assert_eq!(expected_boolean_arr, boolean_arr); + // op: OR left: all true - let left_expr = logical2physical(&logical_col("a").gt(lit(0)), &schema); + let left_expr = logical2physical(&logical_col("a").gt(expr_lit(0)), &schema); let left_value = left_expr.evaluate(&batch).unwrap(); - assert!(check_short_circuit(&left_value, &Operator::Or)); + assert!(matches!( + check_short_circuit(&left_value, &Operator::Or), + ShortCircuitStrategy::ReturnLeft + )); + // op: OR left: not all true - let left_expr = logical2physical(&logical_col("a").gt(lit(2)), &schema); + let left_expr: Arc<dyn PhysicalExpr> = + logical2physical(&logical_col("a").gt(expr_lit(2)), &schema); let left_value = left_expr.evaluate(&batch).unwrap(); - assert!(!check_short_circuit(&left_value, &Operator::Or)); + assert!(matches!( + check_short_circuit(&left_value, &Operator::Or), + ShortCircuitStrategy::None + )); + + // Test with nullable arrays and null values + let schema_nullable = Arc::new(Schema::new(vec![ + Field::new("c", DataType::Boolean, true), + Field::new("d", DataType::Boolean, true), + ])); + + // Create arrays with null values + let c_array = Arc::new(BooleanArray::from(vec![ + Some(true), + Some(false), + None, + Some(true), + None, + ])) as ArrayRef; + let d_array = Arc::new(BooleanArray::from(vec![ + Some(false), + Some(true), + Some(false), + None, + Some(true), + ])) as ArrayRef; + + let batch_nullable = RecordBatch::try_new( + Arc::clone(&schema_nullable), + vec![Arc::clone(&c_array), Arc::clone(&d_array)], + ) + .unwrap(); + + // Case: Mixed values with nulls - shouldn't short-circuit for AND + let mixed_nulls = logical2physical(&logical_col("c"), &schema_nullable); + let mixed_nulls_value = mixed_nulls.evaluate(&batch_nullable).unwrap(); + assert!(matches!( + check_short_circuit(&mixed_nulls_value, &Operator::And), + ShortCircuitStrategy::None + )); + + // Case: Mixed values with nulls - shouldn't short-circuit for OR + assert!(matches!( + check_short_circuit(&mixed_nulls_value, &Operator::Or), + ShortCircuitStrategy::None + )); + + // Test with all nulls + let all_nulls = Arc::new(BooleanArray::from(vec![None, None, None])) as ArrayRef; + let null_batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![Field::new("e", DataType::Boolean, true)])), + vec![all_nulls], + ) + .unwrap(); + + let null_expr = logical2physical(&logical_col("e"), &null_batch.schema()); + let null_value = null_expr.evaluate(&null_batch).unwrap(); + + // All nulls shouldn't short-circuit for AND or OR + assert!(matches!( + check_short_circuit(&null_value, &Operator::And), + ShortCircuitStrategy::None + )); + assert!(matches!( + check_short_circuit(&null_value, &Operator::Or), + ShortCircuitStrategy::None + )); + + // Test with scalar values + // Scalar true + let scalar_true = ColumnarValue::Scalar(ScalarValue::Boolean(Some(true))); + assert!(matches!( + check_short_circuit(&scalar_true, &Operator::Or), + ShortCircuitStrategy::ReturnLeft + )); // Should short-circuit OR + assert!(matches!( + check_short_circuit(&scalar_true, &Operator::And), + ShortCircuitStrategy::ReturnRight + )); // Should return the RHS for AND + + // Scalar false + let scalar_false = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))); + assert!(matches!( + check_short_circuit(&scalar_false, &Operator::And), + ShortCircuitStrategy::ReturnLeft + )); // Should short-circuit AND + assert!(matches!( + check_short_circuit(&scalar_false, &Operator::Or), + ShortCircuitStrategy::ReturnRight + )); // Should return the RHS for OR + + // Scalar null + let scalar_null = ColumnarValue::Scalar(ScalarValue::Boolean(None)); + assert!(matches!( + check_short_circuit(&scalar_null, &Operator::And), + ShortCircuitStrategy::None + )); + assert!(matches!( + check_short_circuit(&scalar_null, &Operator::Or), + ShortCircuitStrategy::None + )); + } + + /// Test for [pre_selection_scatter] + /// Since [check_short_circuit] ensures that the left side does not contain null and is neither all_true nor all_false, as well as not being empty, + /// the following tests have been designed: + /// 1. Test sparse left with interleaved true/false + /// 2. Test multiple consecutive true blocks + /// 3. Test multiple consecutive true blocks + /// 4. Test single true at first position + /// 5. Test single true at last position + /// 6. Test nulls in right array + /// 7. Test scalar right handling + #[test] + fn test_pre_selection_scatter() { + fn create_bool_array(bools: Vec<bool>) -> BooleanArray { + BooleanArray::from(bools.into_iter().map(Some).collect::<Vec<_>>()) + } + // Test sparse left with interleaved true/false Review Comment: 😍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org