jecsand838 commented on code in PR #8298:
URL: https://github.com/apache/arrow-rs/pull/8298#discussion_r2337976557
##########
arrow-avro/src/writer/encoder.rs:
##########
@@ -763,4 +1179,100 @@ mod tests {
let got = encode_all(&arr, &FieldPlan::Scalar, None);
assert_bytes_eq(&got, &expected);
}
+
+ #[test]
Review Comment:
@nathaniel-d-ef You could probably do something like this for the end to end
tests in `arrow-avro/src/writer/mod.rs`:
```rust
fn test_nonnullable_impala_roundtrip_writer() -> Result<(), ArrowError> {
// Load source Avro with Map fields
let path = arrow_test_data("avro/nonnullable.impala.avro");
let rdr_file = File::open(&path).expect("open
avro/nonnullable.impala.avro");
let mut reader = ReaderBuilder::new()
.build(BufReader::new(rdr_file))
.expect("build reader for nonnullable.impala.avro");
// Collect all input batches and concatenate to a single RecordBatch
let in_schema = reader.schema();
let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
let original =
arrow::compute::concat_batches(&in_schema,
&input_batches).expect("concat input");
// Write out using the OCF writer into a temporary file
let tmp = NamedTempFile::new().expect("create temp file");
let out_path = tmp.path();
let out_file = File::create(out_path).expect("create temp avro
file");
let mut writer = AvroWriter::new(out_file,
in_schema.as_ref().clone())?;
writer.write(&original)?;
writer.finish()?;
// Read the produced file back with the Reader
let f_rt = File::open(out_path).expect("open roundtrip avro");
let mut rt_reader = ReaderBuilder::new()
.build(BufReader::new(f_rt))
.expect("build reader for round-tripped OCF");
let rt_schema = rt_reader.schema();
let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
let roundtrip =
arrow::compute::concat_batches(&rt_schema,
&rt_batches).expect("concat roundtrip");
// Exact value fidelity (schema + data)
assert_eq!(
roundtrip, original,
"Round-trip Avro map data mismatch for nonnullable.impala.avro"
);
Ok(())
}
#[test]
fn test_roundtrip_decimals_via_writer() -> Result<(), ArrowError> {
let files: [(&str, bool); 8] = [
("avro/fixed_length_decimal.avro", true), // fixed-backed ->
Decimal128(25,2)
("avro/fixed_length_decimal_legacy.avro", true), // legacy
fixed[8] -> Decimal64(13,2)
("avro/int32_decimal.avro", true), // bytes-backed ->
Decimal32(4,2)
("avro/int64_decimal.avro", true), // bytes-backed ->
Decimal64(10,2)
("test/data/int256_decimal.avro", false), // bytes-backed ->
Decimal256(76,2)
("test/data/fixed256_decimal.avro", false), // fixed[32]-backed
-> Decimal256(76,10)
("test/data/fixed_length_decimal_legacy_32.avro", false), //
legacy fixed[4] -> Decimal32(9,2)
("test/data/int128_decimal.avro", false), // bytes-backed ->
Decimal128(38,2)
];
for (rel, in_test_data_dir) in files {
// Resolve path the same way as reader::test_decimal
let path: String = if in_test_data_dir {
arrow_test_data(rel)
} else {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join(rel)
.to_string_lossy()
.into_owned()
};
// Read original file into a single RecordBatch for comparison
let f_in = File::open(&path).expect("open input avro");
let mut rdr = ReaderBuilder::new().build(BufReader::new(f_in))?;
let in_schema = rdr.schema();
let in_batches = rdr.collect::<Result<Vec<_>, _>>()?;
let original =
arrow::compute::concat_batches(&in_schema,
&in_batches).expect("concat input");
// Write it out with the OCF writer (no special compression)
let tmp = NamedTempFile::new().expect("create temp file");
let out_path = tmp.into_temp_path();
let out_file = File::create(&out_path).expect("create temp
avro");
let mut writer = AvroWriter::new(out_file,
original.schema().as_ref().clone())?;
writer.write(&original)?;
writer.finish()?;
// Read back the file we just wrote and compare equality (schema
+ data)
let f_rt = File::open(&out_path).expect("open roundtrip avro");
let mut rt_rdr =
ReaderBuilder::new().build(BufReader::new(f_rt))?;
let rt_schema = rt_rdr.schema();
let rt_batches = rt_rdr.collect::<Result<Vec<_>, _>>()?;
let roundtrip =
arrow::compute::concat_batches(&rt_schema,
&rt_batches).expect("concat rt");
assert_eq!(roundtrip, original, "decimal round-trip mismatch for
{rel}");
}
Ok(())
}
```
EDIT: Just be sure to add the `small_decimals` feature flag into
`test_roundtrip_decimals_via_writer` and to add a third test using the
`arrow-avro/test/data/duration_uuid.avro` file.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]