jecsand838 commented on code in PR #8298:
URL: https://github.com/apache/arrow-rs/pull/8298#discussion_r2337976557


##########
arrow-avro/src/writer/encoder.rs:
##########
@@ -763,4 +1179,100 @@ mod tests {
         let got = encode_all(&arr, &FieldPlan::Scalar, None);
         assert_bytes_eq(&got, &expected);
     }
+
+    #[test]

Review Comment:
   @nathaniel-d-ef You could probably do something like this for the end to end 
tests in `arrow-avro/src/writer/mod.rs`:
   
   ```rust
       #[test]
       fn test_nonnullable_impala_roundtrip_writer() -> Result<(), ArrowError> {
           // Load source Avro with Map fields
           let path = arrow_test_data("avro/nonnullable.impala.avro");
           let rdr_file = File::open(&path).expect("open 
avro/nonnullable.impala.avro");
           let mut reader = ReaderBuilder::new()
               .build(BufReader::new(rdr_file))
               .expect("build reader for nonnullable.impala.avro");
           // Collect all input batches and concatenate to a single RecordBatch
           let in_schema = reader.schema();
           // Sanity: ensure the file actually contains at least one Map field
           let has_map = in_schema
               .fields()
               .iter()
               .any(|f| matches!(f.data_type(), DataType::Map(_, _)));
           assert!(
               has_map,
               "expected at least one Map field in avro/nonnullable.impala.avro"
           );
   
           let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
           let original =
               arrow::compute::concat_batches(&in_schema, 
&input_batches).expect("concat input");
           // Write out using the OCF writer into an in-memory Vec<u8>
           let buffer = Vec::<u8>::new();
           let mut writer = AvroWriter::new(buffer, 
in_schema.as_ref().clone())?;
           writer.write(&original)?;
           writer.finish()?;
           let out_bytes = writer.into_inner();
           // Read the produced bytes back with the Reader
           let mut rt_reader = ReaderBuilder::new()
               .build(Cursor::new(out_bytes))
               .expect("build reader for round-tripped in-memory OCF");
           let rt_schema = rt_reader.schema();
           let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
           let roundtrip =
               arrow::compute::concat_batches(&rt_schema, 
&rt_batches).expect("concat roundtrip");
           // Exact value fidelity (schema + data)
           assert_eq!(
               roundtrip, original,
               "Round-trip Avro map data mismatch for nonnullable.impala.avro"
           );
           Ok(())
       }
   
       #[test]
       fn test_roundtrip_decimals_via_writer() -> Result<(), ArrowError> {
           // (file, resolve via ARROW_TEST_DATA?)
           let files: [(&str, bool); 8] = [
               ("avro/fixed_length_decimal.avro", true), // fixed-backed -> 
Decimal128(25,2)
               ("avro/fixed_length_decimal_legacy.avro", true), // legacy 
fixed[8] -> Decimal64(13,2)
               ("avro/int32_decimal.avro", true),        // bytes-backed -> 
Decimal32(4,2)
               ("avro/int64_decimal.avro", true),        // bytes-backed -> 
Decimal64(10,2)
               ("test/data/int256_decimal.avro", false), // bytes-backed -> 
Decimal256(76,2)
               ("test/data/fixed256_decimal.avro", false), // fixed[32]-backed 
-> Decimal256(76,10)
               ("test/data/fixed_length_decimal_legacy_32.avro", false), // 
legacy fixed[4] -> Decimal32(9,2)
               ("test/data/int128_decimal.avro", false), // bytes-backed -> 
Decimal128(38,2)
           ];
           for (rel, in_test_data_dir) in files {
               // Resolve path the same way as reader::test_decimal
               let path: String = if in_test_data_dir {
                   arrow_test_data(rel)
               } else {
                   PathBuf::from(env!("CARGO_MANIFEST_DIR"))
                       .join(rel)
                       .to_string_lossy()
                       .into_owned()
               };
               // Read original file into a single RecordBatch for comparison
               let f_in = File::open(&path).expect("open input avro");
               let mut rdr = ReaderBuilder::new().build(BufReader::new(f_in))?;
               let in_schema = rdr.schema();
               let in_batches = rdr.collect::<Result<Vec<_>, _>>()?;
               let original =
                   arrow::compute::concat_batches(&in_schema, 
&in_batches).expect("concat input");
               // Write it out with the OCF writer (no special compression)
               let tmp = NamedTempFile::new().expect("create temp file");
               let out_path = tmp.into_temp_path();
               let out_file = File::create(&out_path).expect("create temp 
avro");
               let mut writer = AvroWriter::new(out_file, 
original.schema().as_ref().clone())?;
               writer.write(&original)?;
               writer.finish()?;
               // Read back the file we just wrote and compare equality (schema 
+ data)
               let f_rt = File::open(&out_path).expect("open roundtrip avro");
               let mut rt_rdr = 
ReaderBuilder::new().build(BufReader::new(f_rt))?;
               let rt_schema = rt_rdr.schema();
               let rt_batches = rt_rdr.collect::<Result<Vec<_>, _>>()?;
               let roundtrip =
                   arrow::compute::concat_batches(&rt_schema, 
&rt_batches).expect("concat rt");
               assert_eq!(roundtrip, original, "decimal round-trip mismatch for 
{rel}");
           }
           Ok(())
       }
   
       #[test]
       fn test_enum_roundtrip_uses_reader_fixture() -> Result<(), ArrowError> {
           // Read the known-good enum file (same as reader::test_simple)
           let path = arrow_test_data("avro/simple_enum.avro");
           let rdr_file = File::open(&path).expect("open 
avro/simple_enum.avro");
           let mut reader = ReaderBuilder::new()
               .build(BufReader::new(rdr_file))
               .expect("build reader for simple_enum.avro");
           // Concatenate all batches to one RecordBatch for a clean equality 
check
           let in_schema = reader.schema();
           let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
           let original =
               arrow::compute::concat_batches(&in_schema, 
&input_batches).expect("concat input");
           // Sanity: expect at least one Dictionary(Int32, Utf8) column (enum)
           let has_enum_dict = in_schema.fields().iter().any(|f| {
               matches!(
                   f.data_type(),
                   DataType::Dictionary(k, v) if **k == DataType::Int32 && **v 
== DataType::Utf8
               )
           });
           assert!(
               has_enum_dict,
               "Expected at least one enum-mapped Dictionary<Int32, Utf8> field"
           );
           // Write with OCF writer into memory using the reader-provided Arrow 
schema.
           // The writer will embed the Avro JSON from `avro.schema` metadata 
if present.
           let buffer: Vec<u8> = Vec::new();
           let mut writer = AvroWriter::new(buffer, 
in_schema.as_ref().clone())?;
           writer.write(&original)?;
           writer.finish()?;
           let bytes = writer.into_inner();
           // Read back and compare for exact equality (schema + data)
           let mut rt_reader = ReaderBuilder::new()
               .build(Cursor::new(bytes))
               .expect("reader for round-trip");
           let rt_schema = rt_reader.schema();
           let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
           let roundtrip =
               arrow::compute::concat_batches(&rt_schema, 
&rt_batches).expect("concat roundtrip");
           assert_eq!(roundtrip, original, "Avro enum round-trip mismatch");
           Ok(())
       }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to