This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 884c6a6  feat(json): support for map arrays in json writer (#1149)
884c6a6 is described below

commit 884c6a633f5f7e9cb2609ba1e8ac05652089d768
Author: Helgi Kristvin Sigurbjarnarson <[email protected]>
AuthorDate: Tue Jan 11 11:19:38 2022 -0800

    feat(json): support for map arrays in json writer (#1149)
---
 arrow/src/array/cast.rs  |  1 +
 arrow/src/array/mod.rs   |  3 +-
 arrow/src/json/writer.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/arrow/src/array/cast.rs b/arrow/src/array/cast.rs
index c96e49e..a708b26 100644
--- a/arrow/src/array/cast.rs
+++ b/arrow/src/array/cast.rs
@@ -95,6 +95,7 @@ array_downcast_fn!(as_boolean_array, BooleanArray);
 array_downcast_fn!(as_null_array, NullArray);
 array_downcast_fn!(as_struct_array, StructArray);
 array_downcast_fn!(as_union_array, UnionArray);
+array_downcast_fn!(as_map_array, MapArray);
 
 #[cfg(test)]
 mod tests {
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index dbeaab7..156c0cf 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -515,7 +515,8 @@ pub use self::ord::{build_compare, DynComparator};
 pub use self::cast::{
     as_boolean_array, as_dictionary_array, as_generic_binary_array,
     as_generic_list_array, as_large_list_array, as_largestring_array, 
as_list_array,
-    as_null_array, as_primitive_array, as_string_array, as_struct_array, 
as_union_array,
+    as_map_array, as_null_array, as_primitive_array, as_string_array, 
as_struct_array,
+    as_union_array,
 };
 
 // ------------------------------ C Data Interface ---------------------------
diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs
index 4279ab7..787ed15 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow/src/json/writer.rs
@@ -486,6 +486,41 @@ fn set_column_for_json_rows(
                 .expect("cannot cast dictionary to underlying values");
             set_column_for_json_rows(rows, row_count, &hydrated, col_name)
         }
+        DataType::Map(_, _) => {
+            let maparr = as_map_array(array);
+
+            let keys = maparr.keys();
+            let values = maparr.values();
+
+            // Keys have to be strings to convert to json.
+            if !matches!(keys.data_type(), DataType::Utf8) {
+                panic!("Unsupported datatype: {:#?}", array.data_type());
+            }
+
+            let keys = as_string_array(&keys);
+            let values = array_to_json_array(&values);
+
+            let mut kv = keys.iter().zip(values.into_iter());
+
+            for (i, row) in rows.iter_mut().take(row_count).enumerate() {
+                if maparr.is_null(i) {
+                    row.insert(col_name.to_string(), serde_json::Value::Null);
+                    continue;
+                }
+
+                let len = maparr.value_length(i) as usize;
+                let mut obj = serde_json::Map::new();
+
+                for (_, (k, v)) in (0..len).zip(&mut kv) {
+                    obj.insert(
+                        k.expect("keys in a map should be 
non-null").to_string(),
+                        v,
+                    );
+                }
+
+                row.insert(col_name.to_string(), 
serde_json::Value::Object(obj));
+            }
+        }
         _ => {
             panic!("Unsupported datatype: {:#?}", array.data_type());
         }
@@ -1315,4 +1350,63 @@ mod tests {
 "#
         );
     }
+
+    #[test]
+    fn json_writer_map() {
+        let keys_array =
+            super::StringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
+        let values_array = super::Int64Array::from(vec![10, 20, 30, 40, 50]);
+
+        let keys = Field::new("keys", DataType::Utf8, false);
+        let values = Field::new("values", DataType::Int64, false);
+        let entry_struct = StructArray::from(vec![
+            (keys, Arc::new(keys_array) as ArrayRef),
+            (values, Arc::new(values_array) as ArrayRef),
+        ]);
+
+        let map_data_type = DataType::Map(
+            Box::new(Field::new(
+                "entries",
+                entry_struct.data_type().clone(),
+                true,
+            )),
+            false,
+        );
+
+        // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}, {"quux": 
50}, {}]
+        let entry_offsets = Buffer::from(&[0, 1, 1, 1, 4, 5, 
5].to_byte_slice());
+        let valid_buffer = Buffer::from([0b00111101]);
+
+        let map_data = ArrayData::builder(map_data_type.clone())
+            .len(6)
+            .null_bit_buffer(valid_buffer)
+            .add_buffer(entry_offsets)
+            .add_child_data(entry_struct.data().clone())
+            .build()
+            .unwrap();
+
+        let map = MapArray::from(map_data);
+
+        let map_field = Field::new("map", map_data_type, false);
+        let schema = Arc::new(Schema::new(vec![map_field]));
+
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(map)]).unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[batch]).unwrap();
+        }
+
+        assert_eq!(
+            String::from_utf8(buf).unwrap(),
+            r#"{"map":{"foo":10}}
+{"map":null}
+{"map":{}}
+{"map":{"bar":20,"baz":30,"qux":40}}
+{"map":{"quux":50}}
+{"map":{}}
+"#
+        );
+    }
 }

Reply via email to