alamb commented on code in PR #10148: URL: https://github.com/apache/datafusion/pull/10148#discussion_r1583614996
########## datafusion/sqllogictest/test_files/map.slt: ########## @@ -44,6 +44,7 @@ DELETE 24 query T Review Comment: I had to remind myself what this data looked like. Here it is for anyone else who may be interested ```sql DataFusion CLI v37.1.0 > select * from 'datafusion/core/tests/data/parquet_map.parquet'; +----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ | ints | strings | timestamp | +----------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ | {bytes: 38906} | {host: 198.194.132.41, method: GET, protocol: HTTP/1.0, referer: https://some.com/this/endpoint/prints/money, request: /observability/metrics/production, status: 400, user-identifier: shaneIxD} | 06/Oct/2023:17:53:45 | | {bytes: 44606} | {host: 140.115.224.194, method: PATCH, protocol: HTTP/1.0, referer: https://we.org/user/booperbot124, request: /booper/bopper/mooper/mopper, status: 304, user-identifier: jesseddy} | 06/Oct/2023:17:53:45 | | {bytes: 23517} | {host: 63.69.43.67, method: GET, protocol: HTTP/2.0, referer: https://random.net/booper/bopper/mooper/mopper, request: /booper/bopper/mooper/mopper, status: 550, user-identifier: jesseddy} | 06/Oct/2023:17:53:45 | | {bytes: 44876} | {host: 69.4.253.156, method: PATCH, protocol: HTTP/1.1, referer: https://some.net/booper/bopper/mooper/mopper, request: /user/booperbot124, status: 403, user-identifier: Karimmove} | 06/Oct/2023:17:53:45 | | {bytes: 34122} | {host: 239.152.196.123, method: DELETE, protocol: HTTP/2.0, referer: https://for.com/observability/metrics/production, request: /apps/deploy, status: 403, user-identifier: meln1ks} | 06/Oct/2023:17:53:45 | | {bytes: 37438} | {host: 95.243.186.123, method: DELETE, protocol: HTTP/1.1, referer: https://make.de/wp-admin, request: /wp-admin, status: 550, user-identifier: Karimmove} | 06/Oct/2023:17:53:45 | | {bytes: 45784} | {host: 66.142.251.66, method: PUT, protocol: HTTP/2.0, referer: https://some.org/apps/deploy, request: /secret-info/open-sesame, status: 403, user-identifier: benefritz} | 06/Oct/2023:17:53:45 | | {bytes: 27788} | {host: 157.85.140.215, method: GET, protocol: HTTP/1.1, referer: https://random.de/booper/bopper/mooper/mopper, request: /booper/bopper/mooper/mopper, status: 401, user-identifier: devankoshal} | 06/Oct/2023:17:53:45 | | {bytes: 5344} | {host: 62.191.179.3, method: POST, protocol: HTTP/1.0, referer: https://random.org/booper/bopper/mooper/mopper, request: /observability/metrics/production, status: 400, user-identifier: jesseddy} | 06/Oct/2023:17:53:45 | | {bytes: 9136} | {host: 237.213.221.20, method: PUT, protocol: HTTP/2.0, referer: https://some.us/this/endpoint/prints/money, request: /observability/metrics/production, status: 304, user-identifier: ahmadajmi} | 06/Oct/2023:17:53:46 | | {bytes: 5640} | {host: 38.148.115.2, method: GET, protocol: HTTP/1.0, referer: https://for.net/apps/deploy, request: /do-not-access/needs-work, status: 301, user-identifier: benefritz} | 06/Oct/2023:17:53:46 | ... ``` ########## datafusion/sqllogictest/test_files/map.slt: ########## @@ -44,6 +44,7 @@ DELETE 24 query T SELECT strings['not_found'] FROM data LIMIT 1; ---- +NULL Review Comment: Here is how it works on main: ```sql > select strings['not_found'] from 'datafusion/core/tests/data/parquet_map.parquet'; 0 row(s) fetched. Elapsed 0.006 seconds. ``` Here is how it works on this PR (aka has a single row for each input row) ```sql DataFusion CLI v37.1.0 > select strings['not_found'] from '../datafusion/core/tests/data/parquet_map.parquet'; +----------------------------------------------------------------------+ | ../datafusion/core/tests/data/parquet_map.parquet.strings[not_found] | +----------------------------------------------------------------------+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | . | | . | | . | +----------------------------------------------------------------------+ 209 row(s) fetched. (First 40 displayed. Use --maxrows to adjust) Elapsed 0.033 seconds. ``` ########## datafusion/sqllogictest/test_files/array.slt: ########## @@ -5197,8 +5199,9 @@ false false false true true false true false true false false true false true false false -false false false false -false false false false +NULL NULL false false Review Comment: Iikewise I agree this should have 7 output rows https://github.com/apache/datafusion/blob/cc53bd3a3ed66341f41dcffdcd3aa6fe1389db71/datafusion/sqllogictest/test_files/array.slt#L80-L90 ########## datafusion/core/tests/user_defined/user_defined_scalar_functions.rs: ########## @@ -205,6 +205,44 @@ impl ScalarUDFImpl for Simple0ArgsScalarUDF { } } +#[tokio::test] +async fn test_row_mismatch_error_in_scalar_udf() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![Arc::new(Int32Array::from(vec![1, 2]))], + )?; + + let ctx = SessionContext::new(); + + ctx.register_batch("t", batch)?; + + // udf that always return 1 row + let buggy_udf = Arc::new(|_: &[ColumnarValue]| { + Ok(ColumnarValue::Array(Arc::new(Int32Array::from(vec![0])))) + }); + + ctx.register_udf(create_udf( + "buggy_func", + vec![DataType::Int32], + Arc::new(DataType::Int32), + Volatility::Immutable, + buggy_udf, + )); + assert_contains!( + ctx.sql("select buggy_func(a) from t") + .await? + .show() + .await + .err() + .unwrap() + .to_string(), + "UDF returned a different number of rows than expected" Review Comment: 👌 -- very nice ########## datafusion/sqllogictest/test_files/array.slt: ########## @@ -5183,8 +5184,9 @@ false false false true true false true false true false false true false true false false -false false false false -false false false false +NULL NULL false false Review Comment: I double checked and the `arrays` table has 7 rows, so I agree the correct answer has 7 output rows as well https://github.com/apache/datafusion/blob/cc53bd3a3ed66341f41dcffdcd3aa6fe1389db71/datafusion/sqllogictest/test_files/array.slt#L58-L68 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org