This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 884c6a6 feat(json): support for map arrays in json writer (#1149)
884c6a6 is described below
commit 884c6a633f5f7e9cb2609ba1e8ac05652089d768
Author: Helgi Kristvin Sigurbjarnarson <[email protected]>
AuthorDate: Tue Jan 11 11:19:38 2022 -0800
feat(json): support for map arrays in json writer (#1149)
---
arrow/src/array/cast.rs | 1 +
arrow/src/array/mod.rs | 3 +-
arrow/src/json/writer.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 97 insertions(+), 1 deletion(-)
diff --git a/arrow/src/array/cast.rs b/arrow/src/array/cast.rs
index c96e49e..a708b26 100644
--- a/arrow/src/array/cast.rs
+++ b/arrow/src/array/cast.rs
@@ -95,6 +95,7 @@ array_downcast_fn!(as_boolean_array, BooleanArray);
array_downcast_fn!(as_null_array, NullArray);
array_downcast_fn!(as_struct_array, StructArray);
array_downcast_fn!(as_union_array, UnionArray);
+array_downcast_fn!(as_map_array, MapArray);
#[cfg(test)]
mod tests {
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index dbeaab7..156c0cf 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -515,7 +515,8 @@ pub use self::ord::{build_compare, DynComparator};
pub use self::cast::{
as_boolean_array, as_dictionary_array, as_generic_binary_array,
as_generic_list_array, as_large_list_array, as_largestring_array,
as_list_array,
- as_null_array, as_primitive_array, as_string_array, as_struct_array,
as_union_array,
+ as_map_array, as_null_array, as_primitive_array, as_string_array,
as_struct_array,
+ as_union_array,
};
// ------------------------------ C Data Interface ---------------------------
diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs
index 4279ab7..787ed15 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow/src/json/writer.rs
@@ -486,6 +486,41 @@ fn set_column_for_json_rows(
.expect("cannot cast dictionary to underlying values");
set_column_for_json_rows(rows, row_count, &hydrated, col_name)
}
+ DataType::Map(_, _) => {
+ let maparr = as_map_array(array);
+
+ let keys = maparr.keys();
+ let values = maparr.values();
+
+ // Keys have to be strings to convert to json.
+ if !matches!(keys.data_type(), DataType::Utf8) {
+ panic!("Unsupported datatype: {:#?}", array.data_type());
+ }
+
+ let keys = as_string_array(&keys);
+ let values = array_to_json_array(&values);
+
+ let mut kv = keys.iter().zip(values.into_iter());
+
+ for (i, row) in rows.iter_mut().take(row_count).enumerate() {
+ if maparr.is_null(i) {
+ row.insert(col_name.to_string(), serde_json::Value::Null);
+ continue;
+ }
+
+ let len = maparr.value_length(i) as usize;
+ let mut obj = serde_json::Map::new();
+
+ for (_, (k, v)) in (0..len).zip(&mut kv) {
+ obj.insert(
+ k.expect("keys in a map should be
non-null").to_string(),
+ v,
+ );
+ }
+
+ row.insert(col_name.to_string(),
serde_json::Value::Object(obj));
+ }
+ }
_ => {
panic!("Unsupported datatype: {:#?}", array.data_type());
}
@@ -1315,4 +1350,63 @@ mod tests {
"#
);
}
+
+ #[test]
+ fn json_writer_map() {
+ let keys_array =
+ super::StringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
+ let values_array = super::Int64Array::from(vec![10, 20, 30, 40, 50]);
+
+ let keys = Field::new("keys", DataType::Utf8, false);
+ let values = Field::new("values", DataType::Int64, false);
+ let entry_struct = StructArray::from(vec![
+ (keys, Arc::new(keys_array) as ArrayRef),
+ (values, Arc::new(values_array) as ArrayRef),
+ ]);
+
+ let map_data_type = DataType::Map(
+ Box::new(Field::new(
+ "entries",
+ entry_struct.data_type().clone(),
+ true,
+ )),
+ false,
+ );
+
+ // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}, {"quux":
50}, {}]
+ let entry_offsets = Buffer::from(&[0, 1, 1, 1, 4, 5,
5].to_byte_slice());
+ let valid_buffer = Buffer::from([0b00111101]);
+
+ let map_data = ArrayData::builder(map_data_type.clone())
+ .len(6)
+ .null_bit_buffer(valid_buffer)
+ .add_buffer(entry_offsets)
+ .add_child_data(entry_struct.data().clone())
+ .build()
+ .unwrap();
+
+ let map = MapArray::from(map_data);
+
+ let map_field = Field::new("map", map_data_type, false);
+ let schema = Arc::new(Schema::new(vec![map_field]));
+
+ let batch = RecordBatch::try_new(schema, vec![Arc::new(map)]).unwrap();
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = LineDelimitedWriter::new(&mut buf);
+ writer.write_batches(&[batch]).unwrap();
+ }
+
+ assert_eq!(
+ String::from_utf8(buf).unwrap(),
+ r#"{"map":{"foo":10}}
+{"map":null}
+{"map":{}}
+{"map":{"bar":20,"baz":30,"qux":40}}
+{"map":{"quux":50}}
+{"map":{}}
+"#
+ );
+ }
}