Rich-T-kid commented on code in PR #10064:
URL: https://github.com/apache/arrow-rs/pull/10064#discussion_r3353881667
##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -5193,4 +5193,159 @@ mod tests {
let cc = file_meta.row_group(0).column(0);
assert!(cc.column_index_range().is_none());
}
+
+ /// Writes `ree` and `flat` to separate Parquet buffers and asserts:
+ /// 1. Reading `ree` bytes back (with the flat schema) produces values
equal to `flat`.
+ fn write_column_to_bytes(array: ArrayRef) -> Bytes {
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "col",
+ array.data_type().clone(),
+ true,
+ )]));
+ let mut buf = Vec::new();
+ let mut writer =
+ ArrowWriter::try_new(&mut buf, schema.clone(),
None).expect("create writer");
+ writer
+ .write(&RecordBatch::try_new(schema, vec![array]).unwrap())
+ .unwrap();
+ writer.close().unwrap();
+ Bytes::from(buf)
+ }
+
+ /// 2. Reading `ree` bytes back equals reading `flat` bytes back (same
output).
+ fn read_column_with_schema(bytes: Bytes, schema: SchemaRef) -> ArrayRef {
+ let opts =
crate::arrow::arrow_reader::ArrowReaderOptions::new().with_schema(schema);
+ ParquetRecordBatchReaderBuilder::try_new_with_options(bytes, opts)
+ .unwrap()
+ .build()
+ .unwrap()
+ .next()
+ .unwrap()
+ .unwrap()
+ .column(0)
+ .clone()
+ }
+
+ fn ree_write_read_roundtrip(ree: ArrayRef, flat: ArrayRef) {
+ let flat_schema = Arc::new(Schema::new(vec![Field::new(
+ "col",
+ flat.data_type().clone(),
+ true,
+ )]));
+ let ree_bytes = write_column_to_bytes(ree);
+ let flat_bytes = write_column_to_bytes(flat.clone());
+
+ let from_ree = read_column_with_schema(ree_bytes, flat_schema.clone());
+ let from_flat = read_column_with_schema(flat_bytes, flat_schema);
+
+ assert_eq!(from_ree.as_ref(), flat.as_ref());
+ assert_eq!(from_ree.as_ref(), from_flat.as_ref());
+ }
+
+ #[test]
+ fn ree_string_column_required() {
Review Comment:
individual unit test need to take up less space, these test can be
simplified into `input_ree_arrayRef`,`expected_output_arrayRef`,`test_type`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]