alamb commented on a change in pull request #9565: URL: https://github.com/apache/arrow/pull/9565#discussion_r583024015
########## File path: rust/datafusion/src/physical_plan/type_coercion.rs ########## @@ -168,20 +168,35 @@ fn maybe_data_types( pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool { use self::DataType::*; match type_into { - Int8 => matches!(type_from, Int8), - Int16 => matches!(type_from, Int8 | Int16 | UInt8), - Int32 => matches!(type_from, Int8 | Int16 | Int32 | UInt8 | UInt16), + Int8 => matches!(type_from, Int8 | Utf8 | LargeUtf8), Review comment: This code seems inconsistent with the comments of `can_coerce_from`: ``` /// Return true if a value of type `type_from` can be coerced /// (losslessly converted) into a value of `type_to` ``` What would happen if we tried to coerce `"foo"` into an `Int8`? I think the correct answer is that that we should get a runtime error. I worry that we would end up silently converting to `Null`. Can you explain why these coercion rules are needed? And if we decide they are, can we please update the comments of this function to reflect the newfound understanding? @jorgecarleitao I think did some previous work in this area, so he may have some relevant perspective. cc @andygrove in case you are interested ########## File path: rust/datafusion/tests/sql.rs ########## @@ -530,17 +530,6 @@ async fn sqrt_f32_vs_f64() -> Result<()> { Ok(()) } -#[tokio::test] -async fn csv_query_error() -> Result<()> { - // sin(utf8) should error - let mut ctx = create_ctx()?; - register_aggregate_csv(&mut ctx)?; - let sql = "SELECT sin(c1) FROM aggregate_test_100"; - let plan = ctx.create_logical_plan(&sql); - assert!(plan.is_err()); Review comment: I think we should leave this test in (even if we decide that `sin(utf8)` should not error) and update it to show the expected results as a way of documenting the expected behavior. ########## File path: rust/datafusion/src/physical_plan/functions.rs ########## @@ -1137,295 +1259,831 @@ mod tests { Float64Array ); test_function!( - Ltrim, - &[lit(ScalarValue::Utf8(Some(" trim".to_string())))], - Ok(Some("trim")), + Left, + &[ + lit(ScalarValue::Utf8(Some("abcde".to_string()))), + lit(ScalarValue::Int8(Some(2))), + ], + Ok(Some("ab")), &str, Utf8, StringArray ); test_function!( - Ltrim, - &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))], - Ok(Some("trim ")), + Left, + &[ + lit(ScalarValue::Utf8(Some("abcde".to_string()))), + lit(ScalarValue::Int64(Some(200))), + ], + Ok(Some("abcde")), &str, Utf8, StringArray ); test_function!( - Ltrim, - &[lit(ScalarValue::Utf8(Some("trim ".to_string())))], - Ok(Some("trim ")), + Left, + &[ + lit(ScalarValue::Utf8(Some("abcde".to_string()))), + lit(ScalarValue::Int64(Some(-2))), + ], + Ok(Some("abc")), &str, Utf8, StringArray ); test_function!( - Ltrim, - &[lit(ScalarValue::Utf8(Some("trim".to_string())))], - Ok(Some("trim")), + Left, + &[ + lit(ScalarValue::Utf8(Some("abcde".to_string()))), + lit(ScalarValue::Int64(Some(-200))), + ], + Ok(Some("")), &str, Utf8, StringArray ); test_function!( - Ltrim, - &[lit(ScalarValue::Utf8(Some("\n trim ".to_string())))], - Ok(Some("\n trim ")), + Left, + &[ + lit(ScalarValue::Utf8(Some("abcde".to_string()))), + lit(ScalarValue::Int64(Some(0))), + ], + Ok(Some("")), &str, Utf8, StringArray ); test_function!( - Ltrim, - &[lit(ScalarValue::Utf8(None))], + Left, + &[ + lit(ScalarValue::Utf8(None)), + lit(ScalarValue::Int64(Some(2))), + ], Ok(None), &str, Utf8, StringArray ); test_function!( - OctetLength, - &[lit(ScalarValue::Utf8(Some("chars".to_string())))], - Ok(Some(5)), - i32, - Int32, - Int32Array - ); - test_function!( - OctetLength, - &[lit(ScalarValue::Utf8(Some("josé".to_string())))], - Ok(Some(5)), - i32, - Int32, - Int32Array + Left, + &[ + lit(ScalarValue::Utf8(Some("abcde".to_string()))), + lit(ScalarValue::Int64(None)), + ], + Ok(None), + &str, + Utf8, + StringArray ); test_function!( - OctetLength, - &[lit(ScalarValue::Utf8(Some("".to_string())))], - Ok(Some(0)), - i32, - Int32, - Int32Array + Left, + &[ + lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), + lit(ScalarValue::Int64(Some(5))), + ], + Ok(Some("joséé")), + &str, + Utf8, + StringArray ); test_function!( - OctetLength, - &[lit(ScalarValue::Utf8(None))], - Ok(None), - i32, - Int32, - Int32Array + Left, + &[ + lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), + lit(ScalarValue::Int64(Some(-3))), + ], + Ok(Some("joséé")), + &str, + Utf8, + StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(0))), + lit(ScalarValue::Utf8(Some("josé".to_string()))), + lit(ScalarValue::Int64(Some(5))), ], - Ok(Some("alphabet")), + Ok(Some(" josé")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(Some(5))), ], - Ok(Some("ésoj")), + Ok(Some(" hi")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(1))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Int64(Some(0))), ], - Ok(Some("alphabet")), + Ok(Some("")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(2))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Int64(None)), ], - Ok(Some("lphabet")), + Ok(None), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(3))), + lit(ScalarValue::Utf8(None)), + lit(ScalarValue::Int64(Some(5))), ], - Ok(Some("phabet")), + Ok(None), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(-3))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Int64(Some(5))), + lit(ScalarValue::Utf8(Some("xy".to_string()))), ], - Ok(Some("alphabet")), + Ok(Some("xyxhi")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(30))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Int64(Some(21))), + lit(ScalarValue::Utf8(Some("abcdef".to_string()))), ], - Ok(Some("")), + Ok(Some("abcdefabcdefabcdefahi")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(None)), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Int64(Some(5))), + lit(ScalarValue::Utf8(Some(" ".to_string()))), ], - Ok(None), + Ok(Some(" hi")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(3))), - lit(ScalarValue::Int64(Some(2))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Int64(Some(5))), + lit(ScalarValue::Utf8(Some("".to_string()))), ], - Ok(Some("ph")), + Ok(Some("hi")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(3))), - lit(ScalarValue::Int64(Some(20))), + lit(ScalarValue::Utf8(None)), + lit(ScalarValue::Int64(Some(5))), + lit(ScalarValue::Utf8(Some("xy".to_string()))), ], - Ok(Some("phabet")), + Ok(None), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), lit(ScalarValue::Int64(None)), - lit(ScalarValue::Int64(Some(20))), + lit(ScalarValue::Utf8(Some("xy".to_string()))), ], Ok(None), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(3))), - lit(ScalarValue::Int64(None)), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Int64(Some(5))), + lit(ScalarValue::Utf8(None)), ], Ok(None), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("alphabet".to_string()))), - lit(ScalarValue::Int64(Some(1))), - lit(ScalarValue::Int64(Some(-1))), + lit(ScalarValue::Utf8(Some("hi".to_string()))), + lit(ScalarValue::Utf8(Some("5".to_string()))), + lit(ScalarValue::Utf8(Some("xy".to_string()))), ], - Err(DataFusionError::Execution( - "negative substring length not allowed".to_string(), - )), + Ok(Some("xyxhi")), &str, Utf8, StringArray ); test_function!( - Substr, + Lpad, &[ - lit(ScalarValue::Utf8(Some("joséésoj".to_string()))), - lit(ScalarValue::Int64(Some(5))), - lit(ScalarValue::Int64(Some(2))), + lit(ScalarValue::Utf8(Some("josé".to_string()))), + lit(ScalarValue::Int64(Some(10))), + lit(ScalarValue::Utf8(Some("xy".to_string()))), ], - Ok(Some("és")), + Ok(Some("xyxyxyjosé")), &str, Utf8, StringArray ); test_function!( - Rtrim, - &[lit(ScalarValue::Utf8(Some("trim ".to_string())))], - Ok(Some("trim")), + Lpad, + &[ + lit(ScalarValue::Utf8(Some("josé".to_string()))), + lit(ScalarValue::Int64(Some(10))), + lit(ScalarValue::Utf8(Some("éñ".to_string()))), + ], + Ok(Some("éñéñéñjosé")), &str, Utf8, StringArray ); test_function!( - Rtrim, - &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))], - Ok(Some(" trim")), + Ltrim, + &[lit(ScalarValue::Utf8(Some(" trim".to_string())))], + Ok(Some("trim")), &str, Utf8, StringArray ); test_function!( - Rtrim, - &[lit(ScalarValue::Utf8(Some(" trim \n".to_string())))], - Ok(Some(" trim \n")), + Ltrim, + &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))], + Ok(Some("trim ")), &str, Utf8, StringArray ); test_function!( - Rtrim, - &[lit(ScalarValue::Utf8(Some(" trim".to_string())))], - Ok(Some(" trim")), + Ltrim, + &[lit(ScalarValue::Utf8(Some("trim ".to_string())))], + Ok(Some("trim ")), &str, Utf8, StringArray ); test_function!( - Rtrim, + Ltrim, &[lit(ScalarValue::Utf8(Some("trim".to_string())))], Ok(Some("trim")), &str, Utf8, StringArray ); test_function!( - Rtrim, + Ltrim, + &[lit(ScalarValue::Utf8(Some("\n trim ".to_string())))], + Ok(Some("\n trim ")), + &str, + Utf8, + StringArray + ); + test_function!( Review comment: I love these tests ########## File path: rust/datafusion/tests/sql.rs ########## @@ -2079,11 +2104,11 @@ async fn test_string_expressions() -> Result<()> { test_expression!("substr('alphabet', 2)", "lphabet"); test_expression!("substr('alphabet', 3)", "phabet"); test_expression!("substr('alphabet', 30)", ""); - test_expression!("substr('alphabet', CAST(NULL AS int))", "NULL"); Review comment: 👍 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org