alamb opened a new issue #139: URL: https://github.com/apache/arrow-rs/issues/139
*Note*: migrated from original JIRA: https://issues.apache.org/jira/browse/ARROW-10920 I stumbled across this by chance. I am not too surprised that this fails but I would expect it to fail gracefully and not with a segmentation fault. {code:java} use std::fs::File; use std::sync::Arc; use arrow::array::StringBuilder; use arrow::datatypes::{DataType, Field, Schema}; use arrow::error::Result; use arrow::record_batch::RecordBatch; use parquet::arrow::ArrowWriter; fn main() -> Result<()> { let schema = Schema::new(vec![ Field::new("c0", DataType::Utf8, false), Field::new("c1", DataType::Utf8, true), ]); let batch_size = 2500000; let repeat_count = 140; let file = File::create("/tmp/test.parquet")?; let mut writer = ArrowWriter::try_new(file, Arc::new(schema.clone()), None).unwrap(); let mut c0_builder = StringBuilder::new(batch_size); let mut c1_builder = StringBuilder::new(batch_size); println!("Start of loop"); for i in 0..batch_size { let c0_value = format!("{:032}", i); let c1_value = c0_value.repeat(repeat_count); c0_builder.append_value(&c0_value)?; c1_builder.append_value(&c1_value)?; } println!("Finish building c0"); let c0 = Arc::new(c0_builder.finish()); println!("Finish building c1"); let c1 = Arc::new(c1_builder.finish()); println!("Creating RecordBatch"); let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![c0, c1])?; // write the batch to parquet println!("Writing RecordBatch"); writer.write(&batch).unwrap(); println!("Closing writer"); writer.close().unwrap(); Ok(()) } {code} output: {code:java} Start of loop Finish building c0 Finish building c1 Creating RecordBatch Writing RecordBatch Segmentation fault (core dumped) {code} -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
