efredine commented on code in PR #11289: URL: https://github.com/apache/datafusion/pull/11289#discussion_r1676555493
########## datafusion/core/tests/parquet/arrow_statistics.rs: ########## @@ -1984,7 +1981,96 @@ async fn test_struct() { } .run(); } +// test nested struct +#[tokio::test] +async fn test_nested_struct() { + // This creates a parquet file with 1 column named "nested_struct" + // The file is created by 1 record batch with 3 rows in the nested struct array + let reader = TestReader { + scenario: Scenario::StructArrayNested, + row_per_group: 5, + } + .build() + .await; + // Expected minimum and maximum values for nested struct fields + let inner_min = StructArray::from(vec![ + ( + Arc::new(Field::new("b", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![Some(false)])) as ArrayRef, + ), + ( + Arc::new(Field::new("c", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![Some(42)])) as ArrayRef, + ), + ]); + let inner_max = StructArray::from(vec![ + ( + Arc::new(Field::new("b", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![Some(true)])) as ArrayRef, + ), + ( + Arc::new(Field::new("c", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![Some(44)])) as ArrayRef, + ), + ]); + + let inner_fields = Fields::from(vec![ + Field::new("b", DataType::Boolean, false), + Field::new("c", DataType::Int32, false), + ]); + + // Expected minimum outer struct + let expected_min_outer = StructArray::from(vec![ + ( + Arc::new(Field::new( + "inner_struct", + DataType::Struct(inner_fields.clone()), + false, + )), + Arc::new(inner_min) as ArrayRef, + ), + ( + Arc::new(Field::new("outer_float", DataType::Float64, false)), + Arc::new(Float64Array::from(vec![Some(5.0)])) as ArrayRef, + ), + ( + Arc::new(Field::new("outer_boolean", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![Some(false)])) as ArrayRef, + ), + ]); + + // Expected maximum outer struct + let expected_max_outer = StructArray::from(vec![ + ( + Arc::new(Field::new( + "inner_struct", + DataType::Struct(inner_fields), + false, + )), + Arc::new(inner_max) as ArrayRef, + ), + ( + Arc::new(Field::new("outer_float", DataType::Float64, false)), + Arc::new(Float64Array::from(vec![Some(7.0)])) as ArrayRef, + ), + ( + Arc::new(Field::new("outer_boolean", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![Some(true)])) as ArrayRef, + ), + ]); + Review Comment: I don't _think_ you need to do anything differently. I was trying to find some tests in the arrow crate for when the statistics are written but I haven't been able to find any for writing nested structs. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org