liyongjing opened a new issue, #1915:
URL: https://github.com/apache/arrow-rs/issues/1915

   **Describe the bug**
   ```rust
   use std::{fs::File, path::Path, sync::Arc};
   
   use arrow::record_batch::RecordBatchReader;
   use parquet::{
       arrow::{ArrowReader, ParquetFileArrowReader, ProjectionMask},
       basic::Compression,
       data_type::Int32Type,
       file::{
           properties::{WriterProperties, WriterVersion},
           writer::SerializedFileWriter,
       },
       schema::parser::parse_message_type,
   };
   
   const MESSAGE_TYPE: &'static str = "
   message Log {
     OPTIONAL INT32 eventType;
     REPEATED INT32 category;
     REPEATED group filter {
       OPTIONAL INT32 error;
     }
   }
   ";
   
   fn main() {
       write();
       read();
   }
   
   fn write() {
       let path = Path::new("sample.parquet");
       let file = File::create(&path).unwrap();
       let schema = Arc::new(parse_message_type(MESSAGE_TYPE).unwrap());
   
       let props = Arc::new(
           WriterProperties::builder()
               .set_compression(Compression::SNAPPY)
               .set_writer_version(WriterVersion::PARQUET_2_0)
               .build(),
       );
       let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
       let mut row_group_writer = writer.next_row_group().unwrap();
   
       // column 0
       let mut col_writer = row_group_writer
           .next_column()
           .expect("next column")
           .unwrap();
       col_writer
           .typed::<Int32Type>()
           .write_batch(&[1], Some(&[1]), None)
           .expect("writing column");
       col_writer.close().expect("close column");
       // column 1
       let mut col_writer = row_group_writer
           .next_column()
           .expect("next column")
           .unwrap();
       col_writer
           .typed::<Int32Type>()
           .write_batch(&[1, 1], Some(&[0, 1]), Some(&[0, 1]))
           .expect("writing column");
       col_writer.close().expect("close column");
       // column 2
       let mut col_writer = row_group_writer
           .next_column()
           .expect("next column")
           .unwrap();
       col_writer
           .typed::<Int32Type>()
           .write_batch(&[1], Some(&[1]), Some(&[1]))
           .expect("writing column");
       col_writer.close();
   
       let rg_md = row_group_writer.close().expect("close row group");
       println!("total rows written: {}", rg_md.num_rows());
   
       writer.close().unwrap();
   }
   
   fn read() {
       let file = File::open("sample.parquet").unwrap();
   
       let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
       let mask = ProjectionMask::leaves(arrow_reader.parquet_schema(), [0]);
   
       println!(
           "Converted arrow schema is: {}",
           arrow_reader.get_schema().unwrap()
       );
       println!(
           "Arrow schema after projection is: {}",
           arrow_reader.get_schema_by_columns(mask.clone()).unwrap()
       ); // error incompatible arrow schema, expected struct got List
   
       let mut unprojected = arrow_reader.get_record_reader(2048).unwrap();
       println!("Unprojected reader schema: {}", unprojected.schema());
   
       let mut record_batch_reader = arrow_reader
           .get_record_reader_by_columns(mask, 2048)
           .unwrap();
   
       for maybe_record_batch in record_batch_reader {
           let record_batch = maybe_record_batch.unwrap();
           if record_batch.num_rows() > 0 {
               println!("Read {} records.", record_batch.num_rows());
           } else {
               println!("End of file!");
           }
       }
   }
   
   ```
   
   **To Reproduce**
   Steps to reproduce the behavior:
   
   **Expected behavior**
   A clear and concise description of what you expected to happen.
   
   **Additional context**
   
https://docs.rs/parquet/16.0.0/parquet/arrow/index.html#example-of-reading-parquet-file-into-arrow-record-batch
   throw error:
   Arrow schema after projection is: Field { name: "eventType", data_type: 
Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }
   thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: 
ArrowError("incompatible arrow schema, expected struct got List(Field { name: 
\"filter\", data_type: Struct([Field { name: \"error\", data_type: Int32, 
nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }]), 
nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None })")', 
example/test.rs:99:64
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to