adriangb commented on code in PR #19050:
URL: https://github.com/apache/datafusion/pull/19050#discussion_r2603231990
##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -698,562 +750,370 @@ mod tests {
}};
}
- #[test]
- fn in_list_utf8() -> Result<()> {
- let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
- let a = StringArray::from(vec![Some("a"), Some("d"), None]);
- let col_a = col("a", &schema)?;
- let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
-
- // expression: "a in ("a", "b")"
- let list = vec![lit("a"), lit("b")];
- in_list!(
- batch,
- list,
- &false,
- vec![Some(true), Some(false), None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a not in ("a", "b")"
- let list = vec![lit("a"), lit("b")];
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), Some(true), None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a in ("a", "b", null)"
- let list = vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))];
- in_list!(
- batch,
- list,
- &false,
- vec![Some(true), None, None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a not in ("a", "b", null)"
- let list = vec![lit("a"), lit("b"), lit(ScalarValue::Utf8(None))];
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), None, None],
- Arc::clone(&col_a),
- &schema
- );
-
- Ok(())
+ /// Test case for primitive types following the standard IN LIST pattern.
+ ///
+ /// Each test case represents a data type with:
+ /// - `value_in`: A value that appears in both the test array and the IN
list (matches → true)
+ /// - `value_not_in`: A value that appears in the test array but NOT in
the IN list (doesn't match → false)
+ /// - `value_in_list`: A value that appears in the IN list but not in the
array (filler value)
+ /// - `null_value`: A null scalar value for NULL handling tests
+ struct InListPrimitiveTestCase {
+ name: &'static str,
+ value_in: ScalarValue,
+ value_not_in: ScalarValue,
+ value_in_list: ScalarValue,
+ null_value: ScalarValue,
}
- #[test]
- fn in_list_binary() -> Result<()> {
- let schema = Schema::new(vec![Field::new("a", DataType::Binary,
true)]);
- let a = BinaryArray::from(vec![
- Some([1, 2, 3].as_slice()),
- Some([1, 2, 2].as_slice()),
- None,
- ]);
- let col_a = col("a", &schema)?;
- let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
-
- // expression: "a in ([1, 2, 3], [4, 5, 6])"
- let list = vec![lit([1, 2, 3].as_slice()), lit([4, 5, 6].as_slice())];
- in_list!(
- batch,
- list.clone(),
- &false,
- vec![Some(true), Some(false), None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a not in ([1, 2, 3], [4, 5, 6])"
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), Some(true), None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a in ([1, 2, 3], [4, 5, 6], null)"
- let list = vec![
- lit([1, 2, 3].as_slice()),
- lit([4, 5, 6].as_slice()),
- lit(ScalarValue::Binary(None)),
- ];
- in_list!(
- batch,
- list.clone(),
- &false,
- vec![Some(true), None, None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a in ([1, 2, 3], [4, 5, 6], null)"
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), None, None],
- Arc::clone(&col_a),
- &schema
- );
-
- Ok(())
+ /// Generic test data struct for primitive types.
+ ///
+ /// Holds the three test values needed for IN LIST tests, allowing the data
+ /// to be declared explicitly and reused across multiple types.
+ #[derive(Clone)]
+ struct PrimitiveTestCaseData<T> {
+ value_in: T,
+ value_not_in: T,
+ value_in_list: T,
}
- #[test]
- fn in_list_int64() -> Result<()> {
- let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
- let a = Int64Array::from(vec![Some(0), Some(2), None]);
- let col_a = col("a", &schema)?;
- let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
-
- // expression: "a in (0, 1)"
- let list = vec![lit(0i64), lit(1i64)];
- in_list!(
- batch,
- list,
- &false,
- vec![Some(true), Some(false), None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a not in (0, 1)"
- let list = vec![lit(0i64), lit(1i64)];
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), Some(true), None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a in (0, 1, NULL)"
- let list = vec![lit(0i64), lit(1i64), lit(ScalarValue::Null)];
- in_list!(
- batch,
- list,
- &false,
- vec![Some(true), None, None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a not in (0, 1, NULL)"
- let list = vec![lit(0i64), lit(1i64), lit(ScalarValue::Null)];
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), None, None],
- Arc::clone(&col_a),
- &schema
- );
-
- Ok(())
+ /// Helper to create test cases for any primitive type using generic data.
+ ///
+ /// Uses TryInto for flexible type conversion, allowing test data to be
+ /// declared in any convertible type (e.g., i32 for all integer types).
+ fn primitive_test_case<T, D, F>(
+ name: &'static str,
+ constructor: F,
+ data: PrimitiveTestCaseData<D>,
+ ) -> InListPrimitiveTestCase
+ where
+ D: TryInto<T>,
+ <D as TryInto<T>>::Error: Debug,
+ F: Fn(Option<T>) -> ScalarValue,
+ T: Clone,
+ {
+ InListPrimitiveTestCase {
+ name,
+ value_in: constructor(Some(data.value_in.try_into().unwrap())),
+ value_not_in:
constructor(Some(data.value_not_in.try_into().unwrap())),
+ value_in_list:
constructor(Some(data.value_in_list.try_into().unwrap())),
+ null_value: constructor(None),
+ }
}
- #[test]
- fn in_list_float64() -> Result<()> {
- let schema = Schema::new(vec![Field::new("a", DataType::Float64,
true)]);
- let a = Float64Array::from(vec![
- Some(0.0),
- Some(0.2),
- None,
- Some(f64::NAN),
- Some(-f64::NAN),
- ]);
- let col_a = col("a", &schema)?;
- let batch = RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(a)])?;
-
- // expression: "a in (0.0, 0.1)"
- let list = vec![lit(0.0f64), lit(0.1f64)];
- in_list!(
- batch,
- list,
- &false,
- vec![Some(true), Some(false), None, Some(false), Some(false)],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a not in (0.0, 0.1)"
- let list = vec![lit(0.0f64), lit(0.1f64)];
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), Some(true), None, Some(true), Some(true)],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a in (0.0, 0.1, NULL)"
- let list = vec![lit(0.0f64), lit(0.1f64), lit(ScalarValue::Null)];
- in_list!(
- batch,
- list,
- &false,
- vec![Some(true), None, None, None, None],
- Arc::clone(&col_a),
- &schema
- );
-
- // expression: "a not in (0.0, 0.1, NULL)"
- let list = vec![lit(0.0f64), lit(0.1f64), lit(ScalarValue::Null)];
- in_list!(
- batch,
- list,
- &true,
- vec![Some(false), None, None, None, None],
- Arc::clone(&col_a),
- &schema
- );
+ /// Runs test cases for multiple types, providing detailed SQL error
messages on failure.
+ ///
+ /// For each test case, runs 4 standard IN LIST scenarios and provides
context
+ /// about the test data and expected behavior when assertions fail.
+ fn run_test_cases(test_cases: Vec<InListPrimitiveTestCase>) -> Result<()> {
+ for test_case in test_cases {
+ let test_name = test_case.name;
+
+ // Get the data type from the scalar value
+ let data_type = test_case.value_in.data_type();
+ let schema = Schema::new(vec![Field::new("a", data_type.clone(),
true)]);
+
+ // Create array from scalar values: [value_in, value_not_in, None]
+ let array = ScalarValue::iter_to_array(vec![
+ test_case.value_in.clone(),
+ test_case.value_not_in.clone(),
+ test_case.null_value.clone(),
+ ])?;
+
+ let col_a = col("a", &schema)?;
+ let batch =
+ RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::clone(&array)])?;
+
+ // Helper to format SQL-like representation for error messages
+ let _format_sql = |negated: bool, with_null: bool| -> String {
Review Comment:
it was a pipe dream to have nice error messages. I updated it and
implemented it. now if a test fails you get something like:
```
assertion `left == right` failed: Failed for: a IN (5, 3, 7)
a: PrimitiveArray<Int32>
[
1,
3,
4,
]
left: BooleanArray
[
true,
true,
true,
]
right: BooleanArray
[
false,
true,
false,
]
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]