tjwilson90 opened a new issue, #2025: URL: https://github.com/apache/arrow-rs/issues/2025
This appears very similar to https://issues.apache.org/jira/browse/ARROW-9790, but specifically for list columns. ``` use arrow::array::{Int32Builder, ListBuilder}; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use parquet::arrow::{ArrowReader, ArrowWriter, ParquetFileArrowReader}; use parquet::file::properties::WriterProperties; use parquet::file::reader::SerializedFileReader; use std::error::Error; use std::sync::Arc; use tempfile::NamedTempFile; fn main() -> Result<(), Box<dyn Error>> { let schema = Arc::new(Schema::new(vec![ Field::new("int", DataType::Int32, false), Field::new( "list", DataType::List(Box::new(Field::new("item", DataType::Int32, true))), false, ), ])); let temp_file = NamedTempFile::new()?; let mut writer = ArrowWriter::try_new( temp_file.reopen()?, schema.clone(), Some( WriterProperties::builder() .set_max_row_group_size(8) .build(), ), )?; for _ in 0..2 { let mut int_builder = Int32Builder::new(10); let mut list_builder = ListBuilder::new(Int32Builder::new(10)); for i in 0..10 { int_builder.append_value(i)?; list_builder.append(true)?; } let batch = RecordBatch::try_new( schema.clone(), vec![ Arc::new(int_builder.finish()), Arc::new(list_builder.finish()), ], )?; writer.write(&batch)?; } writer.close()?; let file_reader = Arc::new(SerializedFileReader::new(temp_file.reopen()?)?); let mut file_reader = ParquetFileArrowReader::new(file_reader); let mut record_reader = file_reader.get_record_reader(8)?; assert_eq!(8, record_reader.next().unwrap()?.num_rows()); assert_eq!(8, record_reader.next().unwrap()?.num_rows()); assert_eq!(4, record_reader.next().unwrap()?.num_rows()); Ok(()) } ``` Fails with `Error: ParquetError("Parquet error: Not all children array length are the same!")` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
