This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new fbbb61d942 Allow writing null valued keys in JSON (#5065)
fbbb61d942 is described below
commit fbbb61d94282165f9bb9f73fb4d00a3af16d4aee
Author: Jeffrey <[email protected]>
AuthorDate: Tue Nov 21 08:18:21 2023 +1100
Allow writing null valued keys in JSON (#5065)
* Allow writing null valued keys in JSON
* Trigger
* Refactor keep nulls to be runtime config
* Rename option
* Rename option
---
arrow-array/src/numeric.rs | 1 -
arrow-json/src/lib.rs | 2 +-
arrow-json/src/writer.rs | 515 ++++++++++++++++++++++++----
arrow-json/test/data/nested_with_nulls.json | 4 +
arrow/src/ffi.rs | 2 -
arrow/tests/array_cast.rs | 1 -
object_store/src/gcp/builder.rs | 2 +-
7 files changed, 461 insertions(+), 66 deletions(-)
diff --git a/arrow-array/src/numeric.rs b/arrow-array/src/numeric.rs
index ad7b3eca1d..b5e474ba69 100644
--- a/arrow-array/src/numeric.rs
+++ b/arrow-array/src/numeric.rs
@@ -618,7 +618,6 @@ mod tests {
let mask = 0b01010101_01010101_10101010_10101010;
let actual = UInt16Type::mask_from_u64(mask);
let expected = expected_mask!(i16, mask);
- dbg!(&expected);
let expected =
m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs
index e69eaaba3e..e39882e526 100644
--- a/arrow-json/src/lib.rs
+++ b/arrow-json/src/lib.rs
@@ -82,7 +82,7 @@ pub type RawReader<R> = Reader<R>;
pub type RawReaderBuilder = ReaderBuilder;
pub use self::reader::{Reader, ReaderBuilder};
-pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer};
+pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer,
WriterBuilder};
use half::f16;
use serde_json::{Number, Value};
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 5ecfc93236..4f74817ca1 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -92,6 +92,10 @@
//! let buf = writer.into_inner();
//! assert_eq!(r#"[{"a":1},{"a":2},{"a":3}]"#, String::from_utf8(buf).unwrap())
//! ```
+//!
+//! [`LineDelimitedWriter`] and [`ArrayWriter`] will omit writing keys with
null values.
+//! In order to explicitly write null values for keys, configure a custom
[`Writer`] by
+//! using a [`WriterBuilder`] to construct a [`Writer`].
use std::iter;
use std::{fmt::Debug, io::Write};
@@ -124,6 +128,7 @@ where
fn struct_array_to_jsonmap_array(
array: &StructArray,
+ explicit_nulls: bool,
) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
let inner_col_names = array.column_names();
@@ -132,13 +137,26 @@ fn struct_array_to_jsonmap_array(
.collect::<Vec<JsonMap<String, Value>>>();
for (j, struct_col) in array.columns().iter().enumerate() {
- set_column_for_json_rows(&mut inner_objs, struct_col,
inner_col_names[j])?
+ set_column_for_json_rows(
+ &mut inner_objs,
+ struct_col,
+ inner_col_names[j],
+ explicit_nulls,
+ )?
}
Ok(inner_objs)
}
/// Converts an arrow [`Array`] into a `Vec` of Serde JSON
[`serde_json::Value`]'s
pub fn array_to_json_array(array: &dyn Array) -> Result<Vec<Value>,
ArrowError> {
+ // For backwards compatibility, default to skip nulls
+ array_to_json_array_internal(array, false)
+}
+
+fn array_to_json_array_internal(
+ array: &dyn Array,
+ explicit_nulls: bool,
+) -> Result<Vec<Value>, ArrowError> {
match array.data_type() {
DataType::Null =>
Ok(iter::repeat(Value::Null).take(array.len()).collect()),
DataType::Boolean => Ok(array
@@ -180,32 +198,44 @@ pub fn array_to_json_array(array: &dyn Array) ->
Result<Vec<Value>, ArrowError>
DataType::List(_) => as_list_array(array)
.iter()
.map(|maybe_value| match maybe_value {
- Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+ Some(v) => Ok(Value::Array(array_to_json_array_internal(
+ &v,
+ explicit_nulls,
+ )?)),
None => Ok(Value::Null),
})
.collect(),
DataType::LargeList(_) => as_large_list_array(array)
.iter()
.map(|maybe_value| match maybe_value {
- Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+ Some(v) => Ok(Value::Array(array_to_json_array_internal(
+ &v,
+ explicit_nulls,
+ )?)),
None => Ok(Value::Null),
})
.collect(),
DataType::FixedSizeList(_, _) => as_fixed_size_list_array(array)
.iter()
.map(|maybe_value| match maybe_value {
- Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+ Some(v) => Ok(Value::Array(array_to_json_array_internal(
+ &v,
+ explicit_nulls,
+ )?)),
None => Ok(Value::Null),
})
.collect(),
DataType::Struct(_) => {
- let jsonmaps = struct_array_to_jsonmap_array(array.as_struct())?;
+ let jsonmaps = struct_array_to_jsonmap_array(array.as_struct(),
explicit_nulls)?;
Ok(jsonmaps.into_iter().map(Value::Object).collect())
}
DataType::Map(_, _) => as_map_array(array)
.iter()
.map(|maybe_value| match maybe_value {
- Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+ Some(v) => Ok(Value::Array(array_to_json_array_internal(
+ &v,
+ explicit_nulls,
+ )?)),
None => Ok(Value::Null),
})
.collect(),
@@ -216,14 +246,16 @@ pub fn array_to_json_array(array: &dyn Array) ->
Result<Vec<Value>, ArrowError>
}
macro_rules! set_column_by_array_type {
- ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident) => {
+ ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident,
$explicit_nulls:ident) => {
let arr = $cast_fn($array);
$rows
.iter_mut()
.zip(arr.iter())
.for_each(|(row, maybe_value)| {
- if let Some(v) = maybe_value {
- row.insert($col_name.to_string(), v.into());
+ if let Some(j) = maybe_value.map(Into::into) {
+ row.insert($col_name.to_string(), j);
+ } else if $explicit_nulls {
+ row.insert($col_name.to_string(), Value::Null);
}
});
};
@@ -233,6 +265,7 @@ fn set_column_by_primitive_type<T>(
rows: &mut [JsonMap<String, Value>],
array: &ArrayRef,
col_name: &str,
+ explicit_nulls: bool,
) where
T: ArrowPrimitiveType,
T::Native: JsonSerializable,
@@ -242,9 +275,10 @@ fn set_column_by_primitive_type<T>(
rows.iter_mut()
.zip(primitive_arr.iter())
.for_each(|(row, maybe_value)| {
- // when value is null, we simply skip setting the key
if let Some(j) = maybe_value.and_then(|v| v.into_json_value()) {
row.insert(col_name.to_string(), j);
+ } else if explicit_nulls {
+ row.insert(col_name.to_string(), Value::Null);
}
});
}
@@ -253,52 +287,57 @@ fn set_column_for_json_rows(
rows: &mut [JsonMap<String, Value>],
array: &ArrayRef,
col_name: &str,
+ explicit_nulls: bool,
) -> Result<(), ArrowError> {
match array.data_type() {
DataType::Int8 => {
- set_column_by_primitive_type::<Int8Type>(rows, array, col_name);
+ set_column_by_primitive_type::<Int8Type>(rows, array, col_name,
explicit_nulls);
}
DataType::Int16 => {
- set_column_by_primitive_type::<Int16Type>(rows, array, col_name);
+ set_column_by_primitive_type::<Int16Type>(rows, array, col_name,
explicit_nulls);
}
DataType::Int32 => {
- set_column_by_primitive_type::<Int32Type>(rows, array, col_name);
+ set_column_by_primitive_type::<Int32Type>(rows, array, col_name,
explicit_nulls);
}
DataType::Int64 => {
- set_column_by_primitive_type::<Int64Type>(rows, array, col_name);
+ set_column_by_primitive_type::<Int64Type>(rows, array, col_name,
explicit_nulls);
}
DataType::UInt8 => {
- set_column_by_primitive_type::<UInt8Type>(rows, array, col_name);
+ set_column_by_primitive_type::<UInt8Type>(rows, array, col_name,
explicit_nulls);
}
DataType::UInt16 => {
- set_column_by_primitive_type::<UInt16Type>(rows, array, col_name);
+ set_column_by_primitive_type::<UInt16Type>(rows, array, col_name,
explicit_nulls);
}
DataType::UInt32 => {
- set_column_by_primitive_type::<UInt32Type>(rows, array, col_name);
+ set_column_by_primitive_type::<UInt32Type>(rows, array, col_name,
explicit_nulls);
}
DataType::UInt64 => {
- set_column_by_primitive_type::<UInt64Type>(rows, array, col_name);
+ set_column_by_primitive_type::<UInt64Type>(rows, array, col_name,
explicit_nulls);
}
DataType::Float16 => {
- set_column_by_primitive_type::<Float16Type>(rows, array, col_name);
+ set_column_by_primitive_type::<Float16Type>(rows, array, col_name,
explicit_nulls);
}
DataType::Float32 => {
- set_column_by_primitive_type::<Float32Type>(rows, array, col_name);
+ set_column_by_primitive_type::<Float32Type>(rows, array, col_name,
explicit_nulls);
}
DataType::Float64 => {
- set_column_by_primitive_type::<Float64Type>(rows, array, col_name);
+ set_column_by_primitive_type::<Float64Type>(rows, array, col_name,
explicit_nulls);
}
DataType::Null => {
- // when value is null, we simply skip setting the key
+ if explicit_nulls {
+ rows.iter_mut().for_each(|row| {
+ row.insert(col_name.to_string(), Value::Null);
+ });
+ }
}
DataType::Boolean => {
- set_column_by_array_type!(as_boolean_array, col_name, rows, array);
+ set_column_by_array_type!(as_boolean_array, col_name, rows, array,
explicit_nulls);
}
DataType::Utf8 => {
- set_column_by_array_type!(as_string_array, col_name, rows, array);
+ set_column_by_array_type!(as_string_array, col_name, rows, array,
explicit_nulls);
}
DataType::LargeUtf8 => {
- set_column_by_array_type!(as_largestring_array, col_name, rows,
array);
+ set_column_by_array_type!(as_largestring_array, col_name, rows,
array, explicit_nulls);
}
DataType::Date32
| DataType::Date64
@@ -310,16 +349,19 @@ fn set_column_for_json_rows(
let formatter = ArrayFormatter::try_new(array.as_ref(), &options)?;
let nulls = array.nulls();
rows.iter_mut().enumerate().for_each(|(idx, row)| {
- if nulls.map(|x| x.is_valid(idx)).unwrap_or(true) {
- row.insert(
- col_name.to_string(),
- formatter.value(idx).to_string().into(),
- );
- }
+ let maybe_value = nulls
+ .map(|x| x.is_valid(idx))
+ .unwrap_or(true)
+ .then(|| formatter.value(idx).to_string().into());
+ if let Some(j) = maybe_value {
+ row.insert(col_name.to_string(), j);
+ } else if explicit_nulls {
+ row.insert(col_name.to_string(), Value::Null);
+ };
});
}
DataType::Struct(_) => {
- let inner_objs = struct_array_to_jsonmap_array(array.as_struct())?;
+ let inner_objs = struct_array_to_jsonmap_array(array.as_struct(),
explicit_nulls)?;
rows.iter_mut().zip(inner_objs).for_each(|(row, obj)| {
row.insert(col_name.to_string(), Value::Object(obj));
});
@@ -328,8 +370,13 @@ fn set_column_for_json_rows(
let listarr = as_list_array(array);
rows.iter_mut().zip(listarr.iter()).try_for_each(
|(row, maybe_value)| -> Result<(), ArrowError> {
- if let Some(v) = maybe_value {
- row.insert(col_name.to_string(),
Value::Array(array_to_json_array(&v)?));
+ let maybe_value = maybe_value
+ .map(|v| array_to_json_array_internal(&v,
explicit_nulls).map(Value::Array))
+ .transpose()?;
+ if let Some(j) = maybe_value {
+ row.insert(col_name.to_string(), j);
+ } else if explicit_nulls {
+ row.insert(col_name.to_string(), Value::Null);
}
Ok(())
},
@@ -339,9 +386,13 @@ fn set_column_for_json_rows(
let listarr = as_large_list_array(array);
rows.iter_mut().zip(listarr.iter()).try_for_each(
|(row, maybe_value)| -> Result<(), ArrowError> {
- if let Some(v) = maybe_value {
- let val = array_to_json_array(&v)?;
- row.insert(col_name.to_string(), Value::Array(val));
+ let maybe_value = maybe_value
+ .map(|v| array_to_json_array_internal(&v,
explicit_nulls).map(Value::Array))
+ .transpose()?;
+ if let Some(j) = maybe_value {
+ row.insert(col_name.to_string(), j);
+ } else if explicit_nulls {
+ row.insert(col_name.to_string(), Value::Null);
}
Ok(())
},
@@ -350,7 +401,7 @@ fn set_column_for_json_rows(
DataType::Dictionary(_, value_type) => {
let hydrated = arrow_cast::cast::cast(&array, value_type)
.expect("cannot cast dictionary to underlying values");
- set_column_for_json_rows(rows, &hydrated, col_name)?;
+ set_column_for_json_rows(rows, &hydrated, col_name,
explicit_nulls)?;
}
DataType::Map(_, _) => {
let maparr = as_map_array(array);
@@ -367,7 +418,7 @@ fn set_column_for_json_rows(
}
let keys = keys.as_string::<i32>();
- let values = array_to_json_array(values)?;
+ let values = array_to_json_array_internal(values, explicit_nulls)?;
let mut kv = keys.iter().zip(values);
@@ -401,6 +452,14 @@ fn set_column_for_json_rows(
/// [`JsonMap`]s (objects)
pub fn record_batches_to_json_rows(
batches: &[&RecordBatch],
+) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
+ // For backwards compatibility, default to skip nulls
+ record_batches_to_json_rows_internal(batches, false)
+}
+
+fn record_batches_to_json_rows_internal(
+ batches: &[&RecordBatch],
+ explicit_nulls: bool,
) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
let mut rows: Vec<JsonMap<String, Value>> = iter::repeat(JsonMap::new())
.take(batches.iter().map(|b| b.num_rows()).sum())
@@ -414,7 +473,7 @@ pub fn record_batches_to_json_rows(
let row_slice = &mut rows[base..base + batch.num_rows()];
for (j, col) in batch.columns().iter().enumerate() {
let col_name = schema.field(j).name();
- set_column_for_json_rows(row_slice, col, col_name)?
+ set_column_for_json_rows(row_slice, col, col_name,
explicit_nulls)?
}
base += row_count;
}
@@ -450,7 +509,9 @@ pub trait JsonFormat: Debug + Default {
}
}
-/// Produces JSON output with one record per line. For example
+/// Produces JSON output with one record per line.
+///
+/// For example:
///
/// ```json
/// {"foo":1}
@@ -467,7 +528,9 @@ impl JsonFormat for LineDelimited {
}
}
-/// Produces JSON output as a single JSON array. For example
+/// Produces JSON output as a single JSON array.
+///
+/// For example:
///
/// ```json
/// [{"foo":1},{"bar":1}]
@@ -494,16 +557,101 @@ impl JsonFormat for JsonArray {
}
}
-/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON
objects
+/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON
objects.
pub type LineDelimitedWriter<W> = Writer<W, LineDelimited>;
-/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays
+/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays.
pub type ArrayWriter<W> = Writer<W, JsonArray>;
+/// JSON writer builder.
+#[derive(Debug, Clone, Default)]
+pub struct WriterBuilder {
+ /// Controls whether null values should be written explicitly for keys
+ /// in objects, or whether the key should be omitted entirely.
+ explicit_nulls: bool,
+}
+
+impl WriterBuilder {
+ /// Create a new builder for configuring JSON writing options.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use arrow_json::{Writer, WriterBuilder};
+ /// # use arrow_json::writer::LineDelimited;
+ /// # use std::fs::File;
+ ///
+ /// fn example() -> Writer<File, LineDelimited> {
+ /// let file = File::create("target/out.json").unwrap();
+ ///
+ /// // create a builder that keeps keys with null values
+ /// let builder = WriterBuilder::new().with_explicit_nulls(true);
+ /// let writer = builder.build::<_, LineDelimited>(file);
+ ///
+ /// writer
+ /// }
+ /// ```
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Returns `true` if this writer is configured to keep keys with null
values.
+ pub fn explicit_nulls(&self) -> bool {
+ self.explicit_nulls
+ }
+
+ /// Set whether to keep keys with null values, or to omit writing them.
+ ///
+ /// For example, with [`LineDelimited`] format:
+ ///
+ /// Skip nulls (set to `false`):
+ ///
+ /// ```json
+ /// {"foo":1}
+ /// {"foo":1,"bar":2}
+ /// {}
+ /// ```
+ ///
+ /// Keep nulls (set to `true`):
+ ///
+ /// ```json
+ /// {"foo":1,"bar":null}
+ /// {"foo":1,"bar":2}
+ /// {"foo":null,"bar":null}
+ /// ```
+ ///
+ /// Default is to skip nulls (set to `false`).
+ pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self {
+ self.explicit_nulls = explicit_nulls;
+ self
+ }
+
+ /// Create a new `Writer` with specified `JsonFormat` and builder options.
+ pub fn build<W, F>(self, writer: W) -> Writer<W, F>
+ where
+ W: Write,
+ F: JsonFormat,
+ {
+ Writer {
+ writer,
+ started: false,
+ finished: false,
+ format: F::default(),
+ explicit_nulls: self.explicit_nulls,
+ }
+ }
+}
+
/// A JSON writer which serializes [`RecordBatch`]es to a stream of
-/// `u8` encoded JSON objects. See the module level documentation for
-/// detailed usage and examples. The specific format of the stream is
-/// controlled by the [`JsonFormat`] type parameter.
+/// `u8` encoded JSON objects.
+///
+/// See the module level documentation for detailed usage and examples.
+/// The specific format of the stream is controlled by the [`JsonFormat`]
+/// type parameter.
+///
+/// By default the writer will skip writing keys with null values for
+/// backward compatibility. See [`WriterBuilder`] on how to customize
+/// this behaviour when creating a new writer.
#[derive(Debug)]
pub struct Writer<W, F>
where
@@ -521,6 +669,9 @@ where
/// Determines how the byte stream is formatted
format: F,
+
+ /// Whether keys with null values should be written or skipped
+ explicit_nulls: bool,
}
impl<W, F> Writer<W, F>
@@ -535,6 +686,7 @@ where
started: false,
finished: false,
format: F::default(),
+ explicit_nulls: false,
}
}
@@ -556,7 +708,7 @@ where
/// Convert the `RecordBatch` into JSON rows, and write them to the output
pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
- for row in record_batches_to_json_rows(&[batch])? {
+ for row in record_batches_to_json_rows_internal(&[batch],
self.explicit_nulls)? {
self.write_row(&Value::Object(row))?;
}
Ok(())
@@ -564,7 +716,7 @@ where
/// Convert the [`RecordBatch`] into JSON rows, and write them to the
output
pub fn write_batches(&mut self, batches: &[&RecordBatch]) -> Result<(),
ArrowError> {
- for row in record_batches_to_json_rows(batches)? {
+ for row in record_batches_to_json_rows_internal(batches,
self.explicit_nulls)? {
self.write_row(&Value::Object(row))?;
}
Ok(())
@@ -609,7 +761,7 @@ mod tests {
use serde_json::json;
- use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
+ use arrow_array::builder::{Int32Builder, Int64Builder, MapBuilder,
StringBuilder};
use arrow_buffer::{Buffer, ToByteSlice};
use arrow_data::ArrayData;
@@ -1203,7 +1355,7 @@ mod tests {
);
}
- fn test_write_for_file(test_file: &str) {
+ fn test_write_for_file(test_file: &str, remove_nulls: bool) {
let file = File::open(test_file).unwrap();
let mut reader = BufReader::new(file);
let (schema, _) = infer_json_schema(&mut reader, None).unwrap();
@@ -1215,18 +1367,27 @@ mod tests {
let mut buf = Vec::new();
{
- let mut writer = LineDelimitedWriter::new(&mut buf);
- writer.write_batches(&[&batch]).unwrap();
+ if remove_nulls {
+ let mut writer = LineDelimitedWriter::new(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ } else {
+ let mut writer = WriterBuilder::new()
+ .with_explicit_nulls(true)
+ .build::<_, LineDelimited>(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ }
}
let result = String::from_utf8(buf).unwrap();
let expected = read_to_string(test_file).unwrap();
for (r, e) in result.lines().zip(expected.lines()) {
let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
- // remove null value from object to make comparison consistent:
- if let Value::Object(obj) = expected_json {
- expected_json =
- Value::Object(obj.into_iter().filter(|(_, v)| *v !=
Value::Null).collect());
+ if remove_nulls {
+ // remove null value from object to make comparison consistent:
+ if let Value::Object(obj) = expected_json {
+ expected_json =
+ Value::Object(obj.into_iter().filter(|(_, v)| *v !=
Value::Null).collect());
+ }
}
assert_eq!(serde_json::from_str::<Value>(r).unwrap(),
expected_json,);
}
@@ -1234,17 +1395,22 @@ mod tests {
#[test]
fn write_basic_rows() {
- test_write_for_file("test/data/basic.json");
+ test_write_for_file("test/data/basic.json", true);
}
#[test]
fn write_arrays() {
- test_write_for_file("test/data/arrays.json");
+ test_write_for_file("test/data/arrays.json", true);
}
#[test]
fn write_basic_nulls() {
- test_write_for_file("test/data/basic_nulls.json");
+ test_write_for_file("test/data/basic_nulls.json", true);
+ }
+
+ #[test]
+ fn write_nested_with_nulls() {
+ test_write_for_file("test/data/nested_with_nulls.json", false);
}
#[test]
@@ -1530,4 +1696,233 @@ mod tests {
assert_eq!(array_to_json_array(&map_array).unwrap(), expected_json);
}
+
+ #[test]
+ fn test_writer_explicit_nulls() -> Result<(), ArrowError> {
+ fn nested_list() -> (Arc<ListArray>, Arc<Field>) {
+ let array = Arc::new(ListArray::from_iter_primitive::<Int32Type,
_, _>(vec![
+ Some(vec![None, None, None]),
+ Some(vec![Some(1), Some(2), Some(3)]),
+ None,
+ Some(vec![None, None, None]),
+ ]));
+ let field = Arc::new(Field::new("list", array.data_type().clone(),
true));
+ //
[{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}]
+ (array, field)
+ }
+
+ fn nested_dict() -> (Arc<DictionaryArray<Int32Type>>, Arc<Field>) {
+ let array = Arc::new(DictionaryArray::from_iter(vec![
+ Some("cupcakes"),
+ None,
+ Some("bear"),
+ Some("kuma"),
+ ]));
+ let field = Arc::new(Field::new("dict", array.data_type().clone(),
true));
+ //
[{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}]
+ (array, field)
+ }
+
+ fn nested_map() -> (Arc<MapArray>, Arc<Field>) {
+ let string_builder = StringBuilder::new();
+ let int_builder = Int64Builder::new();
+ let mut builder = MapBuilder::new(None, string_builder,
int_builder);
+
+ // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}]
+ builder.keys().append_value("foo");
+ builder.values().append_value(10);
+ builder.append(true).unwrap();
+
+ builder.append(false).unwrap();
+
+ builder.append(true).unwrap();
+
+ builder.keys().append_value("bar");
+ builder.values().append_value(20);
+ builder.keys().append_value("baz");
+ builder.values().append_value(30);
+ builder.keys().append_value("qux");
+ builder.values().append_value(40);
+ builder.append(true).unwrap();
+
+ let array = Arc::new(builder.finish());
+ let field = Arc::new(Field::new("map", array.data_type().clone(),
true));
+ (array, field)
+ }
+
+ fn root_list() -> (Arc<ListArray>, Field) {
+ let struct_array = StructArray::from(vec![
+ (
+ Arc::new(Field::new("utf8", DataType::Utf8, true)),
+ Arc::new(StringArray::from(vec![Some("a"), Some("b"),
None, None])) as ArrayRef,
+ ),
+ (
+ Arc::new(Field::new("int32", DataType::Int32, true)),
+ Arc::new(Int32Array::from(vec![Some(1), None, Some(5),
None])) as ArrayRef,
+ ),
+ ]);
+
+ let field = Field::new_list(
+ "list",
+ Field::new("struct", struct_array.data_type().clone(), true),
+ true,
+ );
+
+ //
[{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
+ let entry_offsets = Buffer::from(&[0, 2, 2, 3, 3].to_byte_slice());
+ let data = ArrayData::builder(field.data_type().clone())
+ .len(4)
+ .add_buffer(entry_offsets)
+ .add_child_data(struct_array.into_data())
+ .null_bit_buffer(Some([0b00000101].into()))
+ .build()
+ .unwrap();
+ let array = Arc::new(ListArray::from(data));
+ (array, field)
+ }
+
+ let (nested_list_array, nested_list_field) = nested_list();
+ let (nested_dict_array, nested_dict_field) = nested_dict();
+ let (nested_map_array, nested_map_field) = nested_map();
+ let (root_list_array, root_list_field) = root_list();
+
+ let schema = Schema::new(vec![
+ Field::new("date", DataType::Date32, true),
+ Field::new("null", DataType::Null, true),
+ Field::new_struct(
+ "struct",
+ vec![
+ Arc::new(Field::new("utf8", DataType::Utf8, true)),
+ nested_list_field.clone(),
+ nested_dict_field.clone(),
+ nested_map_field.clone(),
+ ],
+ true,
+ ),
+ root_list_field,
+ ]);
+
+ let arr_date32 = Date32Array::from(vec![Some(0), None, Some(1), None]);
+ let arr_null = NullArray::new(4);
+ let arr_struct = StructArray::from(vec![
+ // [{"utf8":"a"},{"utf8":null},{"utf8":null},{"utf8":"b"}]
+ (
+ Arc::new(Field::new("utf8", DataType::Utf8, true)),
+ Arc::new(StringArray::from(vec![Some("a"), None, None,
Some("b")])) as ArrayRef,
+ ),
+ //
[{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}]
+ (nested_list_field, nested_list_array as ArrayRef),
+ //
[{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}]
+ (nested_dict_field, nested_dict_array as ArrayRef),
+ // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}]
+ (nested_map_field, nested_map_array as ArrayRef),
+ ]);
+
+ let batch = RecordBatch::try_new(
+ Arc::new(schema),
+ vec![
+ //
[{"date":"1970-01-01"},{"date":null},{"date":"1970-01-02"},{"date":null}]
+ Arc::new(arr_date32),
+ // [{"null":null},{"null":null},{"null":null},{"null":null}]
+ Arc::new(arr_null),
+ Arc::new(arr_struct),
+ //
[{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
+ root_list_array,
+ ],
+ )?;
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = WriterBuilder::new()
+ .with_explicit_nulls(true)
+ .build::<_, JsonArray>(&mut buf);
+ writer.write_batches(&[&batch])?;
+ writer.finish()?;
+ }
+
+ let actual = serde_json::from_slice::<Vec<Value>>(&buf).unwrap();
+ let expected = serde_json::from_value::<Vec<Value>>(json!([
+ {
+ "date": "1970-01-01",
+ "list": [
+ {
+ "int32": 1,
+ "utf8": "a"
+ },
+ {
+ "int32": null,
+ "utf8": "b"
+ }
+ ],
+ "null": null,
+ "struct": {
+ "dict": "cupcakes",
+ "list": [
+ null,
+ null,
+ null
+ ],
+ "map": {
+ "foo": 10
+ },
+ "utf8": "a"
+ }
+ },
+ {
+ "date": null,
+ "list": null,
+ "null": null,
+ "struct": {
+ "dict": null,
+ "list": [
+ 1,
+ 2,
+ 3
+ ],
+ "map": null,
+ "utf8": null
+ }
+ },
+ {
+ "date": "1970-01-02",
+ "list": [
+ {
+ "int32": 5,
+ "utf8": null
+ }
+ ],
+ "null": null,
+ "struct": {
+ "dict": "bear",
+ "list": null,
+ "map": {},
+ "utf8": null
+ }
+ },
+ {
+ "date": null,
+ "list": null,
+ "null": null,
+ "struct": {
+ "dict": "kuma",
+ "list": [
+ null,
+ null,
+ null
+ ],
+ "map": {
+ "bar": 20,
+ "baz": 30,
+ "qux": 40
+ },
+ "utf8": "b"
+ }
+ }
+ ]))
+ .unwrap();
+
+ assert_eq!(actual, expected);
+
+ Ok(())
+ }
}
diff --git a/arrow-json/test/data/nested_with_nulls.json
b/arrow-json/test/data/nested_with_nulls.json
new file mode 100644
index 0000000000..932565d560
--- /dev/null
+++ b/arrow-json/test/data/nested_with_nulls.json
@@ -0,0 +1,4 @@
+{"a": null, "b": null, "c": null, "d": {"d1": null, "d2": [null, 1,
2, null]}}
+{"a": null, "b": -3.5, "c": true, "d": {"d1": null, "d2": null}}
+{"a": null, "b": null, "c": false, "d": {"d1": "1970-01-01", "d2": null}}
+{"a": 1, "b": 2.0, "c": false, "d": {"d1": null, "d2": null}}
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 31388bf993..b49f56c915 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -664,8 +664,6 @@ mod tests {
.downcast_ref::<GenericListArray<Offset>>()
.unwrap();
- dbg!(&array);
-
// verify
let expected = GenericListArray::<Offset>::from(list_data);
assert_eq!(&array.value(0), &expected.value(0));
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index bfe16db5cc..c73f4f50ac 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -47,7 +47,6 @@ fn test_cast_timestamp_to_string() {
let a = TimestampMillisecondArray::from(vec![Some(864000000005),
Some(1545696000001), None])
.with_timezone("UTC".to_string());
let array = Arc::new(a) as ArrayRef;
- dbg!(&array);
let b = cast(&array, &DataType::Utf8).unwrap();
let c = b.as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(&DataType::Utf8, c.data_type());
diff --git a/object_store/src/gcp/builder.rs b/object_store/src/gcp/builder.rs
index 5f718d63d9..7417ea4c8a 100644
--- a/object_store/src/gcp/builder.rs
+++ b/object_store/src/gcp/builder.rs
@@ -605,7 +605,7 @@ mod tests {
.with_bucket_name("foo")
.with_proxy_url("https://example.com")
.build();
- assert!(dbg!(gcs).is_ok());
+ assert!(gcs.is_ok());
let err = GoogleCloudStorageBuilder::new()
.with_service_account_path(service_account_path.to_str().unwrap())