alamb opened a new issue #139:
URL: https://github.com/apache/arrow-rs/issues/139


   *Note*: migrated from original JIRA: 
https://issues.apache.org/jira/browse/ARROW-10920
   
   I stumbled across this by chance. I am not too surprised that this fails but 
I would expect it to fail gracefully and not with a segmentation fault.
   
    
   {code:java}
    use std::fs::File;
   use std::sync::Arc;
   
   use arrow::array::StringBuilder;
   use arrow::datatypes::{DataType, Field, Schema};
   use arrow::error::Result;
   use arrow::record_batch::RecordBatch;
   
   use parquet::arrow::ArrowWriter;
   
   fn main() -> Result<()> {
       let schema = Schema::new(vec![
           Field::new("c0", DataType::Utf8, false),
           Field::new("c1", DataType::Utf8, true),
       ]);
       let batch_size = 2500000;
       let repeat_count = 140;
       let file = File::create("/tmp/test.parquet")?;
       let mut writer = ArrowWriter::try_new(file, Arc::new(schema.clone()), 
None).unwrap();
       let mut c0_builder = StringBuilder::new(batch_size);
       let mut c1_builder = StringBuilder::new(batch_size);
   
       println!("Start of loop");
       for i in 0..batch_size {
           let c0_value = format!("{:032}", i);
           let c1_value = c0_value.repeat(repeat_count);
           c0_builder.append_value(&c0_value)?;
           c1_builder.append_value(&c1_value)?;
       }
   
       println!("Finish building c0");
       let c0 = Arc::new(c0_builder.finish());
   
       println!("Finish building c1");
       let c1 = Arc::new(c1_builder.finish());
   
       println!("Creating RecordBatch");
       let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![c0, 
c1])?;
   
       // write the batch to parquet
       println!("Writing RecordBatch");
       writer.write(&batch).unwrap();
   
       println!("Closing writer");
       writer.close().unwrap();
   
       Ok(())
   }
   {code}
   output:
   {code:java}
   Start of loop
   Finish building c0
   Finish building c1
   Creating RecordBatch
   Writing RecordBatch
   Segmentation fault (core dumped)
    {code}
    


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to