alamb commented on code in PR #6399:
URL: https://github.com/apache/arrow-rs/pull/6399#discussion_r1761786847
##########
arrow-schema/src/field.rs:
##########
@@ -375,6 +375,7 @@ impl Field {
| DataType::FixedSizeList(field, _)
| DataType::Map(field, _) => field.fields(),
DataType::Dictionary(_, value_field) =>
Field::_fields(value_field.as_ref()),
+ DataType::RunEndEncoded(_, field) => field.fields(),
Review Comment:
👍
##########
arrow-ipc/src/reader/stream.rs:
##########
@@ -315,4 +315,64 @@ mod tests {
let err = decoder.finish().unwrap_err().to_string();
assert_eq!(err, "Ipc error: Unexpected End of Stream");
}
+
+ #[test]
+ fn test_read_ree_dict_record_batches_from_buffer() {
+ let schema = Schema::new(vec![
+ Field::new(
+ "test1",
+ DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends".to_string(), DataType::Int32,
false)),
+ Arc::new(Field::new_dict(
+ "values".to_string(),
+ DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
+ true,
+ 0,
+ false,
+ )),
+ ),
+ true,
+ ),
+ ]);
+ let batch = RecordBatch::try_new(
+ schema.clone().into(),
+ vec![
+ Arc::new(
+ RunArray::try_new(
+ &Int32Array::from(vec![1, 2, 3]),
+ &vec![Some("a"), None, Some("a")]
+ .into_iter()
+ .collect::<DictionaryArray<Int32Type>>(),
+ )
+ .expect("Failed to create RunArray"),
+ ),
+ ],
+ )
+ .expect("Failed to create RecordBatch");
+
+ let mut buffer = vec![];
+ {
+ let mut writer = StreamWriter::try_new_with_options(
+ &mut buffer,
+ &schema,
+ IpcWriteOptions::default().with_preserve_dict_id(false),
+ )
+ .expect("Failed to create StreamWriter");
+ writer.write(&batch).expect("Failed to write RecordBatch");
+ writer.finish().expect("Failed to finish StreamWriter");
+ }
+
+ let mut decoder = StreamDecoder::new();
+ let buf = &mut Buffer::from(buffer.as_slice());
+ while let Some(_) = decoder
Review Comment:
I think we should update the test to verify the Batch that comes back in
matches the batch that was written -- aka that the data round trips
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]