kosiew commented on issue #18989: URL: https://github.com/apache/datafusion/issues/18989#issuecomment-3632411404
hi @Ke-Wng Trying to narrow this down to whether the panic only happens when there are no rows. I amended it to include some rows on main https://github.com/apache/datafusion/commit/83736efc4ad8865019b0809ac9d87e63eabbe0a8 ``` use std::sync::Arc; use datafusion::arrow::array::{Float64Array, Int64Array, StringArray}; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::datasource::MemTable; use datafusion::functions_aggregate::count::count_udaf; use datafusion::logical_expr::col; use datafusion::prelude::*; #[tokio::main] async fn main() { let ctx = SessionContext::default(); let schema = Arc::new(Schema::new(vec![ Field::new("ts", DataType::Int64, false), Field::new("region", DataType::Utf8, false), Field::new("value", DataType::Float64, false), ])); // create a multi-partitioned MemTable with sample data // partition 1: us-west region let partition1 = RecordBatch::try_new( schema.clone(), vec![ Arc::new(Int64Array::from(vec![1000, 1000, 2000, 2000])), Arc::new(StringArray::from(vec![ "us-west", "us-west", "us-west", "us-west", ])), Arc::new(Float64Array::from(vec![10.5, 20.3, 15.2, 25.8])), ], ) .expect("Failed to create partition 1"); // partition 2: eu-east region let partition2 = RecordBatch::try_new( schema.clone(), vec![ Arc::new(Int64Array::from(vec![1000, 1000, 2000])), Arc::new(StringArray::from(vec!["eu-east", "eu-east", "eu-east"])), Arc::new(Float64Array::from(vec![30.1, 40.2, 35.5])), ], ) .expect("Failed to create partition 2"); let mem_table = MemTable::try_new(schema.clone(), vec![vec![partition1], vec![partition2]]) .expect("Failed to create MemTable"); ctx.register_table("metrics", Arc::new(mem_table)) .expect("Failed to register table"); // aggregate and sort twice - this pattern previously caused a panic let data_frame = ctx .table("metrics") .await .expect("Failed to get table") .aggregate( vec![col("region"), col("ts")], vec![count_udaf().call(vec![col("value")])], ) .expect("Failed first aggregate") .sort(vec![ col("region").sort(true, true), col("ts").sort(true, true), ]) .expect("Failed first sort") .aggregate( vec![col("ts")], vec![count_udaf().call(vec![col("count(metrics.value)")])], ) .expect("Failed second aggregate") .sort(vec![col("ts").sort(true, true)]) .expect("Failed second sort"); println!( "Logical Plan:\n{}", data_frame.logical_plan().display_indent() ); println!("\nExecuting query (should not panic)..."); data_frame.show().await.expect("Failed to execute query"); println!("\nā Success! The query executed without panicking."); } ``` and it ran successfuly: ``` Logical Plan: Sort: metrics.ts ASC NULLS FIRST Aggregate: groupBy=[[metrics.ts]], aggr=[[count(count(metrics.value))]] Sort: metrics.region ASC NULLS FIRST, metrics.ts ASC NULLS FIRST Aggregate: groupBy=[[metrics.region, metrics.ts]], aggr=[[count(metrics.value)]] TableScan: metrics Executing query (should not panic)... +------+-----------------------------+ | ts | count(count(metrics.value)) | +------+-----------------------------+ | 1000 | 2 | | 2000 | 2 | +------+-----------------------------+ ā Success! The query executed without panicking. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
