alamb commented on code in PR #8298:
URL: https://github.com/apache/arrow-rs/pull/8298#discussion_r2337684738
##########
arrow-avro/src/writer/encoder.rs:
##########
@@ -763,4 +1179,100 @@ mod tests {
let got = encode_all(&arr, &FieldPlan::Scalar, None);
assert_bytes_eq(&got, &expected);
}
+
+ #[test]
Review Comment:
Could we also please add some "end to end" round trip tests here?
Specifically
1. Create an Arrow array of the relevant type
2. Write the array an avro file
3. Read the avro file back
4. Assert that the array that comes back is the same as was written
maybe @jecsand838 knows of existing tests which we can pattern new tests on
##########
arrow-avro/src/writer/encoder.rs:
##########
@@ -763,4 +1179,100 @@ mod tests {
let got = encode_all(&arr, &FieldPlan::Scalar, None);
assert_bytes_eq(&got, &expected);
}
+
+ #[test]
+ fn list_encoder_int32() {
+ // Build ListArray [[1,2], [], [3]]
+ let values = Int32Array::from(vec![1, 2, 3]);
+ let offsets = vec![0, 2, 2, 3];
+ let list = ListArray::new(
+ Field::new("item", DataType::Int32, true).into(),
+ arrow_buffer::OffsetBuffer::new(offsets.into()),
+ Arc::new(values) as ArrayRef,
+ None,
+ );
+ // Avro array encoding per row
+ let mut expected = Vec::new();
+ // row 0: block len 2, items 1,2 then 0
+ expected.extend(avro_long_bytes(2));
+ expected.extend(avro_long_bytes(1));
+ expected.extend(avro_long_bytes(2));
+ expected.extend(avro_long_bytes(0));
+ // row 1: empty
+ expected.extend(avro_long_bytes(0));
+ // row 2: one item 3
+ expected.extend(avro_long_bytes(1));
+ expected.extend(avro_long_bytes(3));
+ expected.extend(avro_long_bytes(0));
+
+ let plan = FieldPlan::List {
+ items_nullability: None,
+ item_plan: Box::new(FieldPlan::Scalar),
+ };
+ let got = encode_all(&list, &plan, None);
+ assert_bytes_eq(&got, &expected);
+ }
+
+ #[test]
+ fn struct_encoder_two_fields() {
+ // Struct { a: Int32, b: Utf8 }
+ let a = Int32Array::from(vec![1, 2]);
+ let b = StringArray::from(vec!["x", "y"]);
+ let fields = Fields::from(vec![
+ Field::new("a", DataType::Int32, true),
+ Field::new("b", DataType::Utf8, true),
+ ]);
+ let struct_arr = StructArray::new(
+ fields.clone(),
+ vec![Arc::new(a) as ArrayRef, Arc::new(b) as ArrayRef],
+ None,
+ );
+ let plan = FieldPlan::Struct {
+ encoders: vec![
+ FieldBinding {
+ arrow_index: 0,
+ nullability: None,
+ plan: FieldPlan::Scalar,
+ },
+ FieldBinding {
+ arrow_index: 1,
+ nullability: None,
+ plan: FieldPlan::Scalar,
+ },
+ ],
+ };
+ let got = encode_all(&struct_arr, &plan, None);
+ // Expected: rows concatenated: a then b
+ let mut expected = Vec::new();
+ expected.extend(avro_long_bytes(1)); // a=1
+ expected.extend(avro_len_prefixed_bytes(b"x")); // b="x"
+ expected.extend(avro_long_bytes(2)); // a=2
+ expected.extend(avro_len_prefixed_bytes(b"y")); // b="y"
+ assert_bytes_eq(&got, &expected);
+ }
+
+ #[test]
Review Comment:
Do we need coverage for the other types? For example, FixedSizeBinary, UUID,
etc?
Here is the list from the PR description
> Fixed
> UUID
> IntervalMonthDayNano
> IntervalYearMonth
> IntervalDayTime
> Decimal32
> Decimal64
> Decimal128
> Decimal256
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]