liyongjing opened a new issue, #1886:
URL: https://github.com/apache/arrow-rs/issues/1886

   **Which part is this question about**
   ```
   use std::{fs::File, path::Path, sync::Arc};
   
   use parquet::{
       basic::Compression,
       data_type::{ByteArray, ByteArrayType, Int32Type},
       file::{
           properties::{WriterProperties, WriterVersion},
           reader::FileReader,
           serialized_reader::SerializedFileReader,
           writer::SerializedFileWriter,
       },
       record::{Row, RowAccessor},
       schema::parser::parse_message_type,
   };
   
   const MESSAGE_TYPE: &'static str = "
   message Log {
     OPTIONAL INT32 eventType;
     REPEATED BYTE_ARRAY category;
   }
   ";
   
   pub struct Item {
       pub event_type: i32,
       pub category: Vec<String>,
   }
   
   pub struct Batch {
       pub event_types: Vec<i32>,
       pub categories: Vec<ByteArray>,
   }
   
   fn data() -> Batch {
       let items = vec![
           Item {
               event_type: 1,
               category: vec!["test11".to_string(), "test12".to_string()],
           },
           Item {
               event_type: 2,
               category: vec!["test21".to_string(), "test22".to_string()],
           },
       ];
       let mut b = Batch {
           event_types: vec![],
           categories: vec![],
       };
   
       for item in &items {
           b.event_types.push(item.event_type);
           for cate in &item.category {
               b.categories.push(ByteArray::from(cate.as_str()));
           }
       }
       b
   }
   
   fn write() {
       let path = Path::new("sample.parquet");
       let file = File::create(&path).unwrap();
       let schema = Arc::new(parse_message_type(MESSAGE_TYPE).unwrap());
   
       let props = Arc::new(
           WriterProperties::builder()
               .set_compression(Compression::SNAPPY)
               .set_writer_version(WriterVersion::PARQUET_2_0)
               .build(),
       );
   
       let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
       let mut row_group_writer = writer.next_row_group().unwrap();
   
       let batch = data();
       // column 0
       let mut col_writer = row_group_writer
           .next_column()
           .expect("next column")
           .unwrap();
       col_writer
           .typed::<Int32Type>()
           .write_batch(&batch.event_types, None, None)
           .expect("writing column");
       col_writer.close().expect("close column");
       // column 1 how write REPEATED?
       let mut col_writer = row_group_writer
           .next_column()
           .expect("next column")
           .unwrap();
       col_writer
           .typed::<ByteArrayType>()
           .write_batch(&batch.categories, None, None)
           .expect("writing column");
       col_writer.close().expect("close column");
   
       let rg_md = row_group_writer.close().expect("close row group");
       println!("total rows written: {}", rg_md.num_rows());
   
       writer.close().unwrap();
   }
   
   fn read() {
       let path = Path::new("sample.parquet");
       let file = File::open(path).expect("Unable to open file");
       let reader = SerializedFileReader::new(file).expect("Unable to read 
file");
   
       let iter = reader.get_row_iter(None).expect("get iterator");
       for record in iter {
           let event_type = record.get_int(0).unwrap();
           read_category(&record, 1);
           println!("event_type{}", event_type);
       }
   }
   
   // public static List<String> getCategory(Group value) {
   //     List<String> categoryList = new ArrayList<>();
   //     try {
   //       int count = value.getFieldRepetitionCount("category");
   //       if (count > 0) {
   //         int index = 0;
   //         while (index < count) {
   //           categoryList.add(value.getString("category", index++).trim());
   //         }
   //       }
   //     } catch (Exception e) {
   //     }
   //     return categoryList;
   //   }
   fn read_category(record: &Row, i: usize) {
       // where is getFieldRepetitionCount
       match record.get_bytes(i) {
           Ok(v) => println!("{:?}", v.as_utf8()),
           Err(_) => {}
       };
   }
   ```
   
   **Describe your question**
   how read/write category using parquet
   
   **Additional context**
   Add any other context about the problem here.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to