Dandandan commented on a change in pull request #793: URL: https://github.com/apache/arrow-datafusion/pull/793#discussion_r679380782
########## File path: datafusion/tests/sql.rs ########## @@ -3014,6 +3014,109 @@ async fn query_count_distinct() -> Result<()> { Ok(()) } +#[tokio::test] +async fn query_group_on_null() -> Result<()> { + let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, true)])); + + let data = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(Int32Array::from(vec![ + Some(0), + Some(3), + None, + Some(1), + Some(3), + ]))], + )?; + + let table = MemTable::try_new(schema, vec![vec![data]])?; + + let mut ctx = ExecutionContext::new(); + ctx.register_table("test", Arc::new(table))?; + let sql = "SELECT COUNT(*), c1 FROM test GROUP BY c1"; + + let actual = execute_to_batches(&mut ctx, sql).await; + + // Note that the results also + // include a row for NULL (c1=NULL, count = 1) + let expected = vec![ + "+-----------------+----+", + "| COUNT(UInt8(1)) | c1 |", + "+-----------------+----+", + "| 1 | |", + "| 1 | 0 |", + "| 1 | 1 |", + "| 2 | 3 |", + "+-----------------+----+", + ]; + assert_batches_sorted_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn query_group_on_null_multi_col() -> Result<()> { + let schema = Arc::new(Schema::new(vec![ + Field::new("c1", DataType::Int32, true), + Field::new("c2", DataType::Utf8, true), + ])); + + let data = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![ + Some(0), + Some(0), + Some(3), + None, + None, + Some(3), + Some(0), + None, + Some(3), + ])), + Arc::new(StringArray::from(vec![ + None, + None, + Some("foo"), + None, + Some("bar"), + Some("foo"), + None, + Some("bar"), + Some("foo"), + ])), + ], + )?; + + let table = MemTable::try_new(schema, vec![vec![data]])?; + + let mut ctx = ExecutionContext::new(); + ctx.register_table("test", Arc::new(table))?; + let sql = "SELECT COUNT(*), c1, c2 FROM test GROUP BY c1, c2"; + + let actual = execute_to_batches(&mut ctx, sql).await; + + // Note that the results also include values for null + // include a row for NULL (c1=NULL, count = 1) + let expected = vec![ + "+-----------------+----+-----+", + "| COUNT(UInt8(1)) | c1 | c2 |", + "+-----------------+----+-----+", + "| 1 | | |", + "| 2 | | bar |", + "| 3 | 0 | |", Review comment: 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org