This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch cherry_pick_8308615d in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
commit c2fc8d4938dd3e032b23f1766db6a8743d6350c2 Author: Yuan Zhou <[email protected]> AuthorDate: Sat Aug 21 18:33:11 2021 +0800 Support binary data type in `build_struct_array`. (#702) * Support binary data type in `build_struct_array`. * Modify test case. * cargo fmt Co-authored-by: Andrew Lamb <[email protected]> --- arrow/src/array/array.rs | 22 ++++++++++++++++++++++ arrow/src/json/reader.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index d715bc4..7a1796b 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -658,6 +658,28 @@ mod tests { } #[test] + fn test_null_map() { + let data_type = DataType::Map( + Box::new(Field::new( + "entry", + DataType::Struct(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Int32, true), + ]), + false, + )), + false, + ); + let array = new_null_array(&data_type, 9); + let a = array.as_any().downcast_ref::<MapArray>().unwrap(); + assert_eq!(a.len(), 9); + assert_eq!(a.value_offsets()[9], 0i32); + for i in 0..9 { + assert!(a.is_null(i)); + } + } + + #[test] fn test_null_dictionary() { let values = vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>; diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs index 0b220eb..2e716eb 100644 --- a/arrow/src/json/reader.rs +++ b/arrow/src/json/reader.rs @@ -1224,6 +1224,14 @@ impl Decoder { }) .collect::<StringArray>(), ) as ArrayRef), + DataType::Binary => Ok(Arc::new( + rows.iter() + .map(|row| { + let maybe_value = row.get(field.name()); + maybe_value.and_then(|value| value.as_str()) + }) + .collect::<BinaryArray>(), + ) as ArrayRef), DataType::List(ref list_field) => { match list_field.data_type() { DataType::Dictionary(ref key_ty, _) => { @@ -2964,6 +2972,38 @@ mod tests { } #[test] + fn test_json_read_binary_structs() { + let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]); + let decoder = Decoder::new(Arc::new(schema), 1024, None); + let batch = decoder + .next_batch( + &mut vec![ + Ok(serde_json::json!({ + "c1": "₁₂₃", + })), + Ok(serde_json::json!({ + "c1": "foo", + })), + ] + .into_iter(), + ) + .unwrap() + .unwrap(); + let data = batch.columns().iter().collect::<Vec<_>>(); + + let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]); + let binary_values = BinaryArray::from(vec!["₁₂₃".as_bytes(), "foo".as_bytes()]); + let expected_batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(binary_values)]) + .unwrap(); + let expected_data = expected_batch.columns().iter().collect::<Vec<_>>(); + + assert_eq!(data, expected_data); + assert_eq!(batch.num_columns(), 1); + assert_eq!(batch.num_rows(), 2); + } + + #[test] fn test_json_iterator() { let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(5); let reader: Reader<File> = builder
