This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new fbbb61d942 Allow writing null valued keys in JSON (#5065)
fbbb61d942 is described below

commit fbbb61d94282165f9bb9f73fb4d00a3af16d4aee
Author: Jeffrey <[email protected]>
AuthorDate: Tue Nov 21 08:18:21 2023 +1100

    Allow writing null valued keys in JSON (#5065)
    
    * Allow writing null valued keys in JSON
    
    * Trigger
    
    * Refactor keep nulls to be runtime config
    
    * Rename option
    
    * Rename option
---
 arrow-array/src/numeric.rs                  |   1 -
 arrow-json/src/lib.rs                       |   2 +-
 arrow-json/src/writer.rs                    | 515 ++++++++++++++++++++++++----
 arrow-json/test/data/nested_with_nulls.json |   4 +
 arrow/src/ffi.rs                            |   2 -
 arrow/tests/array_cast.rs                   |   1 -
 object_store/src/gcp/builder.rs             |   2 +-
 7 files changed, 461 insertions(+), 66 deletions(-)

diff --git a/arrow-array/src/numeric.rs b/arrow-array/src/numeric.rs
index ad7b3eca1d..b5e474ba69 100644
--- a/arrow-array/src/numeric.rs
+++ b/arrow-array/src/numeric.rs
@@ -618,7 +618,6 @@ mod tests {
         let mask = 0b01010101_01010101_10101010_10101010;
         let actual = UInt16Type::mask_from_u64(mask);
         let expected = expected_mask!(i16, mask);
-        dbg!(&expected);
         let expected = 
m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
 
         assert_eq!(expected, actual);
diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs
index e69eaaba3e..e39882e526 100644
--- a/arrow-json/src/lib.rs
+++ b/arrow-json/src/lib.rs
@@ -82,7 +82,7 @@ pub type RawReader<R> = Reader<R>;
 pub type RawReaderBuilder = ReaderBuilder;
 
 pub use self::reader::{Reader, ReaderBuilder};
-pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer};
+pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer, 
WriterBuilder};
 use half::f16;
 use serde_json::{Number, Value};
 
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 5ecfc93236..4f74817ca1 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -92,6 +92,10 @@
 //! let buf = writer.into_inner();
 //! assert_eq!(r#"[{"a":1},{"a":2},{"a":3}]"#, String::from_utf8(buf).unwrap())
 //! ```
+//!
+//! [`LineDelimitedWriter`] and [`ArrayWriter`] will omit writing keys with 
null values.
+//! In order to explicitly write null values for keys, configure a custom 
[`Writer`] by
+//! using a [`WriterBuilder`] to construct a [`Writer`].
 
 use std::iter;
 use std::{fmt::Debug, io::Write};
@@ -124,6 +128,7 @@ where
 
 fn struct_array_to_jsonmap_array(
     array: &StructArray,
+    explicit_nulls: bool,
 ) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
     let inner_col_names = array.column_names();
 
@@ -132,13 +137,26 @@ fn struct_array_to_jsonmap_array(
         .collect::<Vec<JsonMap<String, Value>>>();
 
     for (j, struct_col) in array.columns().iter().enumerate() {
-        set_column_for_json_rows(&mut inner_objs, struct_col, 
inner_col_names[j])?
+        set_column_for_json_rows(
+            &mut inner_objs,
+            struct_col,
+            inner_col_names[j],
+            explicit_nulls,
+        )?
     }
     Ok(inner_objs)
 }
 
 /// Converts an arrow [`Array`] into a `Vec` of Serde JSON 
[`serde_json::Value`]'s
 pub fn array_to_json_array(array: &dyn Array) -> Result<Vec<Value>, 
ArrowError> {
+    // For backwards compatibility, default to skip nulls
+    array_to_json_array_internal(array, false)
+}
+
+fn array_to_json_array_internal(
+    array: &dyn Array,
+    explicit_nulls: bool,
+) -> Result<Vec<Value>, ArrowError> {
     match array.data_type() {
         DataType::Null => 
Ok(iter::repeat(Value::Null).take(array.len()).collect()),
         DataType::Boolean => Ok(array
@@ -180,32 +198,44 @@ pub fn array_to_json_array(array: &dyn Array) -> 
Result<Vec<Value>, ArrowError>
         DataType::List(_) => as_list_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
         DataType::LargeList(_) => as_large_list_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
         DataType::FixedSizeList(_, _) => as_fixed_size_list_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
         DataType::Struct(_) => {
-            let jsonmaps = struct_array_to_jsonmap_array(array.as_struct())?;
+            let jsonmaps = struct_array_to_jsonmap_array(array.as_struct(), 
explicit_nulls)?;
             Ok(jsonmaps.into_iter().map(Value::Object).collect())
         }
         DataType::Map(_, _) => as_map_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
@@ -216,14 +246,16 @@ pub fn array_to_json_array(array: &dyn Array) -> 
Result<Vec<Value>, ArrowError>
 }
 
 macro_rules! set_column_by_array_type {
-    ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident) => {
+    ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident, 
$explicit_nulls:ident) => {
         let arr = $cast_fn($array);
         $rows
             .iter_mut()
             .zip(arr.iter())
             .for_each(|(row, maybe_value)| {
-                if let Some(v) = maybe_value {
-                    row.insert($col_name.to_string(), v.into());
+                if let Some(j) = maybe_value.map(Into::into) {
+                    row.insert($col_name.to_string(), j);
+                } else if $explicit_nulls {
+                    row.insert($col_name.to_string(), Value::Null);
                 }
             });
     };
@@ -233,6 +265,7 @@ fn set_column_by_primitive_type<T>(
     rows: &mut [JsonMap<String, Value>],
     array: &ArrayRef,
     col_name: &str,
+    explicit_nulls: bool,
 ) where
     T: ArrowPrimitiveType,
     T::Native: JsonSerializable,
@@ -242,9 +275,10 @@ fn set_column_by_primitive_type<T>(
     rows.iter_mut()
         .zip(primitive_arr.iter())
         .for_each(|(row, maybe_value)| {
-            // when value is null, we simply skip setting the key
             if let Some(j) = maybe_value.and_then(|v| v.into_json_value()) {
                 row.insert(col_name.to_string(), j);
+            } else if explicit_nulls {
+                row.insert(col_name.to_string(), Value::Null);
             }
         });
 }
@@ -253,52 +287,57 @@ fn set_column_for_json_rows(
     rows: &mut [JsonMap<String, Value>],
     array: &ArrayRef,
     col_name: &str,
+    explicit_nulls: bool,
 ) -> Result<(), ArrowError> {
     match array.data_type() {
         DataType::Int8 => {
-            set_column_by_primitive_type::<Int8Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int8Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::Int16 => {
-            set_column_by_primitive_type::<Int16Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int16Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::Int32 => {
-            set_column_by_primitive_type::<Int32Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int32Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::Int64 => {
-            set_column_by_primitive_type::<Int64Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int64Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::UInt8 => {
-            set_column_by_primitive_type::<UInt8Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt8Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::UInt16 => {
-            set_column_by_primitive_type::<UInt16Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt16Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::UInt32 => {
-            set_column_by_primitive_type::<UInt32Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt32Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::UInt64 => {
-            set_column_by_primitive_type::<UInt64Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt64Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::Float16 => {
-            set_column_by_primitive_type::<Float16Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Float16Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::Float32 => {
-            set_column_by_primitive_type::<Float32Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Float32Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::Float64 => {
-            set_column_by_primitive_type::<Float64Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Float64Type>(rows, array, col_name, 
explicit_nulls);
         }
         DataType::Null => {
-            // when value is null, we simply skip setting the key
+            if explicit_nulls {
+                rows.iter_mut().for_each(|row| {
+                    row.insert(col_name.to_string(), Value::Null);
+                });
+            }
         }
         DataType::Boolean => {
-            set_column_by_array_type!(as_boolean_array, col_name, rows, array);
+            set_column_by_array_type!(as_boolean_array, col_name, rows, array, 
explicit_nulls);
         }
         DataType::Utf8 => {
-            set_column_by_array_type!(as_string_array, col_name, rows, array);
+            set_column_by_array_type!(as_string_array, col_name, rows, array, 
explicit_nulls);
         }
         DataType::LargeUtf8 => {
-            set_column_by_array_type!(as_largestring_array, col_name, rows, 
array);
+            set_column_by_array_type!(as_largestring_array, col_name, rows, 
array, explicit_nulls);
         }
         DataType::Date32
         | DataType::Date64
@@ -310,16 +349,19 @@ fn set_column_for_json_rows(
             let formatter = ArrayFormatter::try_new(array.as_ref(), &options)?;
             let nulls = array.nulls();
             rows.iter_mut().enumerate().for_each(|(idx, row)| {
-                if nulls.map(|x| x.is_valid(idx)).unwrap_or(true) {
-                    row.insert(
-                        col_name.to_string(),
-                        formatter.value(idx).to_string().into(),
-                    );
-                }
+                let maybe_value = nulls
+                    .map(|x| x.is_valid(idx))
+                    .unwrap_or(true)
+                    .then(|| formatter.value(idx).to_string().into());
+                if let Some(j) = maybe_value {
+                    row.insert(col_name.to_string(), j);
+                } else if explicit_nulls {
+                    row.insert(col_name.to_string(), Value::Null);
+                };
             });
         }
         DataType::Struct(_) => {
-            let inner_objs = struct_array_to_jsonmap_array(array.as_struct())?;
+            let inner_objs = struct_array_to_jsonmap_array(array.as_struct(), 
explicit_nulls)?;
             rows.iter_mut().zip(inner_objs).for_each(|(row, obj)| {
                 row.insert(col_name.to_string(), Value::Object(obj));
             });
@@ -328,8 +370,13 @@ fn set_column_for_json_rows(
             let listarr = as_list_array(array);
             rows.iter_mut().zip(listarr.iter()).try_for_each(
                 |(row, maybe_value)| -> Result<(), ArrowError> {
-                    if let Some(v) = maybe_value {
-                        row.insert(col_name.to_string(), 
Value::Array(array_to_json_array(&v)?));
+                    let maybe_value = maybe_value
+                        .map(|v| array_to_json_array_internal(&v, 
explicit_nulls).map(Value::Array))
+                        .transpose()?;
+                    if let Some(j) = maybe_value {
+                        row.insert(col_name.to_string(), j);
+                    } else if explicit_nulls {
+                        row.insert(col_name.to_string(), Value::Null);
                     }
                     Ok(())
                 },
@@ -339,9 +386,13 @@ fn set_column_for_json_rows(
             let listarr = as_large_list_array(array);
             rows.iter_mut().zip(listarr.iter()).try_for_each(
                 |(row, maybe_value)| -> Result<(), ArrowError> {
-                    if let Some(v) = maybe_value {
-                        let val = array_to_json_array(&v)?;
-                        row.insert(col_name.to_string(), Value::Array(val));
+                    let maybe_value = maybe_value
+                        .map(|v| array_to_json_array_internal(&v, 
explicit_nulls).map(Value::Array))
+                        .transpose()?;
+                    if let Some(j) = maybe_value {
+                        row.insert(col_name.to_string(), j);
+                    } else if explicit_nulls {
+                        row.insert(col_name.to_string(), Value::Null);
                     }
                     Ok(())
                 },
@@ -350,7 +401,7 @@ fn set_column_for_json_rows(
         DataType::Dictionary(_, value_type) => {
             let hydrated = arrow_cast::cast::cast(&array, value_type)
                 .expect("cannot cast dictionary to underlying values");
-            set_column_for_json_rows(rows, &hydrated, col_name)?;
+            set_column_for_json_rows(rows, &hydrated, col_name, 
explicit_nulls)?;
         }
         DataType::Map(_, _) => {
             let maparr = as_map_array(array);
@@ -367,7 +418,7 @@ fn set_column_for_json_rows(
             }
 
             let keys = keys.as_string::<i32>();
-            let values = array_to_json_array(values)?;
+            let values = array_to_json_array_internal(values, explicit_nulls)?;
 
             let mut kv = keys.iter().zip(values);
 
@@ -401,6 +452,14 @@ fn set_column_for_json_rows(
 /// [`JsonMap`]s (objects)
 pub fn record_batches_to_json_rows(
     batches: &[&RecordBatch],
+) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
+    // For backwards compatibility, default to skip nulls
+    record_batches_to_json_rows_internal(batches, false)
+}
+
+fn record_batches_to_json_rows_internal(
+    batches: &[&RecordBatch],
+    explicit_nulls: bool,
 ) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
     let mut rows: Vec<JsonMap<String, Value>> = iter::repeat(JsonMap::new())
         .take(batches.iter().map(|b| b.num_rows()).sum())
@@ -414,7 +473,7 @@ pub fn record_batches_to_json_rows(
             let row_slice = &mut rows[base..base + batch.num_rows()];
             for (j, col) in batch.columns().iter().enumerate() {
                 let col_name = schema.field(j).name();
-                set_column_for_json_rows(row_slice, col, col_name)?
+                set_column_for_json_rows(row_slice, col, col_name, 
explicit_nulls)?
             }
             base += row_count;
         }
@@ -450,7 +509,9 @@ pub trait JsonFormat: Debug + Default {
     }
 }
 
-/// Produces JSON output with one record per line. For example
+/// Produces JSON output with one record per line.
+///
+/// For example:
 ///
 /// ```json
 /// {"foo":1}
@@ -467,7 +528,9 @@ impl JsonFormat for LineDelimited {
     }
 }
 
-/// Produces JSON output as a single JSON array. For example
+/// Produces JSON output as a single JSON array.
+///
+/// For example:
 ///
 /// ```json
 /// [{"foo":1},{"bar":1}]
@@ -494,16 +557,101 @@ impl JsonFormat for JsonArray {
     }
 }
 
-/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON 
objects
+/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON 
objects.
 pub type LineDelimitedWriter<W> = Writer<W, LineDelimited>;
 
-/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays
+/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays.
 pub type ArrayWriter<W> = Writer<W, JsonArray>;
 
+/// JSON writer builder.
+#[derive(Debug, Clone, Default)]
+pub struct WriterBuilder {
+    /// Controls whether null values should be written explicitly for keys
+    /// in objects, or whether the key should be omitted entirely.
+    explicit_nulls: bool,
+}
+
+impl WriterBuilder {
+    /// Create a new builder for configuring JSON writing options.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use arrow_json::{Writer, WriterBuilder};
+    /// # use arrow_json::writer::LineDelimited;
+    /// # use std::fs::File;
+    ///
+    /// fn example() -> Writer<File, LineDelimited> {
+    ///     let file = File::create("target/out.json").unwrap();
+    ///
+    ///     // create a builder that keeps keys with null values
+    ///     let builder = WriterBuilder::new().with_explicit_nulls(true);
+    ///     let writer = builder.build::<_, LineDelimited>(file);
+    ///
+    ///     writer
+    /// }
+    /// ```
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Returns `true` if this writer is configured to keep keys with null 
values.
+    pub fn explicit_nulls(&self) -> bool {
+        self.explicit_nulls
+    }
+
+    /// Set whether to keep keys with null values, or to omit writing them.
+    ///
+    /// For example, with [`LineDelimited`] format:
+    ///
+    /// Skip nulls (set to `false`):
+    ///
+    /// ```json
+    /// {"foo":1}
+    /// {"foo":1,"bar":2}
+    /// {}
+    /// ```
+    ///
+    /// Keep nulls (set to `true`):
+    ///
+    /// ```json
+    /// {"foo":1,"bar":null}
+    /// {"foo":1,"bar":2}
+    /// {"foo":null,"bar":null}
+    /// ```
+    ///
+    /// Default is to skip nulls (set to `false`).
+    pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self {
+        self.explicit_nulls = explicit_nulls;
+        self
+    }
+
+    /// Create a new `Writer` with specified `JsonFormat` and builder options.
+    pub fn build<W, F>(self, writer: W) -> Writer<W, F>
+    where
+        W: Write,
+        F: JsonFormat,
+    {
+        Writer {
+            writer,
+            started: false,
+            finished: false,
+            format: F::default(),
+            explicit_nulls: self.explicit_nulls,
+        }
+    }
+}
+
 /// A JSON writer which serializes [`RecordBatch`]es to a stream of
-/// `u8` encoded JSON objects. See the module level documentation for
-/// detailed usage and examples. The specific format of the stream is
-/// controlled by the [`JsonFormat`] type parameter.
+/// `u8` encoded JSON objects.
+///
+/// See the module level documentation for detailed usage and examples.
+/// The specific format of the stream is controlled by the [`JsonFormat`]
+/// type parameter.
+///
+/// By default the writer will skip writing keys with null values for
+/// backward compatibility. See [`WriterBuilder`] on how to customize
+/// this behaviour when creating a new writer.
 #[derive(Debug)]
 pub struct Writer<W, F>
 where
@@ -521,6 +669,9 @@ where
 
     /// Determines how the byte stream is formatted
     format: F,
+
+    /// Whether keys with null values should be written or skipped
+    explicit_nulls: bool,
 }
 
 impl<W, F> Writer<W, F>
@@ -535,6 +686,7 @@ where
             started: false,
             finished: false,
             format: F::default(),
+            explicit_nulls: false,
         }
     }
 
@@ -556,7 +708,7 @@ where
 
     /// Convert the `RecordBatch` into JSON rows, and write them to the output
     pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
-        for row in record_batches_to_json_rows(&[batch])? {
+        for row in record_batches_to_json_rows_internal(&[batch], 
self.explicit_nulls)? {
             self.write_row(&Value::Object(row))?;
         }
         Ok(())
@@ -564,7 +716,7 @@ where
 
     /// Convert the [`RecordBatch`] into JSON rows, and write them to the 
output
     pub fn write_batches(&mut self, batches: &[&RecordBatch]) -> Result<(), 
ArrowError> {
-        for row in record_batches_to_json_rows(batches)? {
+        for row in record_batches_to_json_rows_internal(batches, 
self.explicit_nulls)? {
             self.write_row(&Value::Object(row))?;
         }
         Ok(())
@@ -609,7 +761,7 @@ mod tests {
 
     use serde_json::json;
 
-    use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
+    use arrow_array::builder::{Int32Builder, Int64Builder, MapBuilder, 
StringBuilder};
     use arrow_buffer::{Buffer, ToByteSlice};
     use arrow_data::ArrayData;
 
@@ -1203,7 +1355,7 @@ mod tests {
         );
     }
 
-    fn test_write_for_file(test_file: &str) {
+    fn test_write_for_file(test_file: &str, remove_nulls: bool) {
         let file = File::open(test_file).unwrap();
         let mut reader = BufReader::new(file);
         let (schema, _) = infer_json_schema(&mut reader, None).unwrap();
@@ -1215,18 +1367,27 @@ mod tests {
 
         let mut buf = Vec::new();
         {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[&batch]).unwrap();
+            if remove_nulls {
+                let mut writer = LineDelimitedWriter::new(&mut buf);
+                writer.write_batches(&[&batch]).unwrap();
+            } else {
+                let mut writer = WriterBuilder::new()
+                    .with_explicit_nulls(true)
+                    .build::<_, LineDelimited>(&mut buf);
+                writer.write_batches(&[&batch]).unwrap();
+            }
         }
 
         let result = String::from_utf8(buf).unwrap();
         let expected = read_to_string(test_file).unwrap();
         for (r, e) in result.lines().zip(expected.lines()) {
             let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
-            // remove null value from object to make comparison consistent:
-            if let Value::Object(obj) = expected_json {
-                expected_json =
-                    Value::Object(obj.into_iter().filter(|(_, v)| *v != 
Value::Null).collect());
+            if remove_nulls {
+                // remove null value from object to make comparison consistent:
+                if let Value::Object(obj) = expected_json {
+                    expected_json =
+                        Value::Object(obj.into_iter().filter(|(_, v)| *v != 
Value::Null).collect());
+                }
             }
             assert_eq!(serde_json::from_str::<Value>(r).unwrap(), 
expected_json,);
         }
@@ -1234,17 +1395,22 @@ mod tests {
 
     #[test]
     fn write_basic_rows() {
-        test_write_for_file("test/data/basic.json");
+        test_write_for_file("test/data/basic.json", true);
     }
 
     #[test]
     fn write_arrays() {
-        test_write_for_file("test/data/arrays.json");
+        test_write_for_file("test/data/arrays.json", true);
     }
 
     #[test]
     fn write_basic_nulls() {
-        test_write_for_file("test/data/basic_nulls.json");
+        test_write_for_file("test/data/basic_nulls.json", true);
+    }
+
+    #[test]
+    fn write_nested_with_nulls() {
+        test_write_for_file("test/data/nested_with_nulls.json", false);
     }
 
     #[test]
@@ -1530,4 +1696,233 @@ mod tests {
 
         assert_eq!(array_to_json_array(&map_array).unwrap(), expected_json);
     }
+
+    #[test]
+    fn test_writer_explicit_nulls() -> Result<(), ArrowError> {
+        fn nested_list() -> (Arc<ListArray>, Arc<Field>) {
+            let array = Arc::new(ListArray::from_iter_primitive::<Int32Type, 
_, _>(vec![
+                Some(vec![None, None, None]),
+                Some(vec![Some(1), Some(2), Some(3)]),
+                None,
+                Some(vec![None, None, None]),
+            ]));
+            let field = Arc::new(Field::new("list", array.data_type().clone(), 
true));
+            // 
[{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}]
+            (array, field)
+        }
+
+        fn nested_dict() -> (Arc<DictionaryArray<Int32Type>>, Arc<Field>) {
+            let array = Arc::new(DictionaryArray::from_iter(vec![
+                Some("cupcakes"),
+                None,
+                Some("bear"),
+                Some("kuma"),
+            ]));
+            let field = Arc::new(Field::new("dict", array.data_type().clone(), 
true));
+            // 
[{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}]
+            (array, field)
+        }
+
+        fn nested_map() -> (Arc<MapArray>, Arc<Field>) {
+            let string_builder = StringBuilder::new();
+            let int_builder = Int64Builder::new();
+            let mut builder = MapBuilder::new(None, string_builder, 
int_builder);
+
+            // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}]
+            builder.keys().append_value("foo");
+            builder.values().append_value(10);
+            builder.append(true).unwrap();
+
+            builder.append(false).unwrap();
+
+            builder.append(true).unwrap();
+
+            builder.keys().append_value("bar");
+            builder.values().append_value(20);
+            builder.keys().append_value("baz");
+            builder.values().append_value(30);
+            builder.keys().append_value("qux");
+            builder.values().append_value(40);
+            builder.append(true).unwrap();
+
+            let array = Arc::new(builder.finish());
+            let field = Arc::new(Field::new("map", array.data_type().clone(), 
true));
+            (array, field)
+        }
+
+        fn root_list() -> (Arc<ListArray>, Field) {
+            let struct_array = StructArray::from(vec![
+                (
+                    Arc::new(Field::new("utf8", DataType::Utf8, true)),
+                    Arc::new(StringArray::from(vec![Some("a"), Some("b"), 
None, None])) as ArrayRef,
+                ),
+                (
+                    Arc::new(Field::new("int32", DataType::Int32, true)),
+                    Arc::new(Int32Array::from(vec![Some(1), None, Some(5), 
None])) as ArrayRef,
+                ),
+            ]);
+
+            let field = Field::new_list(
+                "list",
+                Field::new("struct", struct_array.data_type().clone(), true),
+                true,
+            );
+
+            // 
[{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
+            let entry_offsets = Buffer::from(&[0, 2, 2, 3, 3].to_byte_slice());
+            let data = ArrayData::builder(field.data_type().clone())
+                .len(4)
+                .add_buffer(entry_offsets)
+                .add_child_data(struct_array.into_data())
+                .null_bit_buffer(Some([0b00000101].into()))
+                .build()
+                .unwrap();
+            let array = Arc::new(ListArray::from(data));
+            (array, field)
+        }
+
+        let (nested_list_array, nested_list_field) = nested_list();
+        let (nested_dict_array, nested_dict_field) = nested_dict();
+        let (nested_map_array, nested_map_field) = nested_map();
+        let (root_list_array, root_list_field) = root_list();
+
+        let schema = Schema::new(vec![
+            Field::new("date", DataType::Date32, true),
+            Field::new("null", DataType::Null, true),
+            Field::new_struct(
+                "struct",
+                vec![
+                    Arc::new(Field::new("utf8", DataType::Utf8, true)),
+                    nested_list_field.clone(),
+                    nested_dict_field.clone(),
+                    nested_map_field.clone(),
+                ],
+                true,
+            ),
+            root_list_field,
+        ]);
+
+        let arr_date32 = Date32Array::from(vec![Some(0), None, Some(1), None]);
+        let arr_null = NullArray::new(4);
+        let arr_struct = StructArray::from(vec![
+            // [{"utf8":"a"},{"utf8":null},{"utf8":null},{"utf8":"b"}]
+            (
+                Arc::new(Field::new("utf8", DataType::Utf8, true)),
+                Arc::new(StringArray::from(vec![Some("a"), None, None, 
Some("b")])) as ArrayRef,
+            ),
+            // 
[{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}]
+            (nested_list_field, nested_list_array as ArrayRef),
+            // 
[{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}]
+            (nested_dict_field, nested_dict_array as ArrayRef),
+            // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}]
+            (nested_map_field, nested_map_array as ArrayRef),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                // 
[{"date":"1970-01-01"},{"date":null},{"date":"1970-01-02"},{"date":null}]
+                Arc::new(arr_date32),
+                // [{"null":null},{"null":null},{"null":null},{"null":null}]
+                Arc::new(arr_null),
+                Arc::new(arr_struct),
+                // 
[{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
+                root_list_array,
+            ],
+        )?;
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_explicit_nulls(true)
+                .build::<_, JsonArray>(&mut buf);
+            writer.write_batches(&[&batch])?;
+            writer.finish()?;
+        }
+
+        let actual = serde_json::from_slice::<Vec<Value>>(&buf).unwrap();
+        let expected = serde_json::from_value::<Vec<Value>>(json!([
+          {
+            "date": "1970-01-01",
+            "list": [
+              {
+                "int32": 1,
+                "utf8": "a"
+              },
+              {
+                "int32": null,
+                "utf8": "b"
+              }
+            ],
+            "null": null,
+            "struct": {
+              "dict": "cupcakes",
+              "list": [
+                null,
+                null,
+                null
+              ],
+              "map": {
+                "foo": 10
+              },
+              "utf8": "a"
+            }
+          },
+          {
+            "date": null,
+            "list": null,
+            "null": null,
+            "struct": {
+              "dict": null,
+              "list": [
+                1,
+                2,
+                3
+              ],
+              "map": null,
+              "utf8": null
+            }
+          },
+          {
+            "date": "1970-01-02",
+            "list": [
+              {
+                "int32": 5,
+                "utf8": null
+              }
+            ],
+            "null": null,
+            "struct": {
+              "dict": "bear",
+              "list": null,
+              "map": {},
+              "utf8": null
+            }
+          },
+          {
+            "date": null,
+            "list": null,
+            "null": null,
+            "struct": {
+              "dict": "kuma",
+              "list": [
+                null,
+                null,
+                null
+              ],
+              "map": {
+                "bar": 20,
+                "baz": 30,
+                "qux": 40
+              },
+              "utf8": "b"
+            }
+          }
+        ]))
+        .unwrap();
+
+        assert_eq!(actual, expected);
+
+        Ok(())
+    }
 }
diff --git a/arrow-json/test/data/nested_with_nulls.json 
b/arrow-json/test/data/nested_with_nulls.json
new file mode 100644
index 0000000000..932565d560
--- /dev/null
+++ b/arrow-json/test/data/nested_with_nulls.json
@@ -0,0 +1,4 @@
+{"a": null, "b": null, "c":  null, "d": {"d1":         null, "d2": [null, 1, 
2, null]}}
+{"a": null, "b": -3.5, "c":  true, "d": {"d1":         null, "d2": null}}
+{"a": null, "b": null, "c": false, "d": {"d1": "1970-01-01", "d2": null}}
+{"a":    1, "b":  2.0, "c": false, "d": {"d1":         null, "d2": null}}
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 31388bf993..b49f56c915 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -664,8 +664,6 @@ mod tests {
             .downcast_ref::<GenericListArray<Offset>>()
             .unwrap();
 
-        dbg!(&array);
-
         // verify
         let expected = GenericListArray::<Offset>::from(list_data);
         assert_eq!(&array.value(0), &expected.value(0));
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index bfe16db5cc..c73f4f50ac 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -47,7 +47,6 @@ fn test_cast_timestamp_to_string() {
     let a = TimestampMillisecondArray::from(vec![Some(864000000005), 
Some(1545696000001), None])
         .with_timezone("UTC".to_string());
     let array = Arc::new(a) as ArrayRef;
-    dbg!(&array);
     let b = cast(&array, &DataType::Utf8).unwrap();
     let c = b.as_any().downcast_ref::<StringArray>().unwrap();
     assert_eq!(&DataType::Utf8, c.data_type());
diff --git a/object_store/src/gcp/builder.rs b/object_store/src/gcp/builder.rs
index 5f718d63d9..7417ea4c8a 100644
--- a/object_store/src/gcp/builder.rs
+++ b/object_store/src/gcp/builder.rs
@@ -605,7 +605,7 @@ mod tests {
             .with_bucket_name("foo")
             .with_proxy_url("https://example.com";)
             .build();
-        assert!(dbg!(gcs).is_ok());
+        assert!(gcs.is_ok());
 
         let err = GoogleCloudStorageBuilder::new()
             .with_service_account_path(service_account_path.to_str().unwrap())


Reply via email to