This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 48246fff71 Add tests for serializing list of dictionary encoded values
to json (#5399)
48246fff71 is described below
commit 48246fff71b48813c93c73f7cce02f92d29df389
Author: Jörn Horstmann <[email protected]>
AuthorDate: Wed Feb 14 20:23:30 2024 +0100
Add tests for serializing list of dictionary encoded values to json (#5399)
---
arrow-json/src/writer.rs | 89 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 88 insertions(+), 1 deletion(-)
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 8409f2e557..bb494b5950 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -833,7 +833,7 @@ mod tests {
use serde_json::json;
use arrow_array::builder::{Int32Builder, Int64Builder, MapBuilder,
StringBuilder};
- use arrow_buffer::{Buffer, ToByteSlice};
+ use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
use arrow_data::ArrayData;
use crate::reader::*;
@@ -953,6 +953,93 @@ mod tests {
);
}
+ #[test]
+ fn write_list_of_dictionary() {
+ let dict_field = Arc::new(Field::new_dictionary(
+ "item",
+ DataType::Int32,
+ DataType::Utf8,
+ true,
+ ));
+ let schema = Schema::new(vec![Field::new_large_list("l",
dict_field.clone(), true)]);
+
+ let dict_array: DictionaryArray<Int32Type> =
+ vec![Some("a"), Some("b"), Some("c"), Some("a"), None, Some("c")]
+ .into_iter()
+ .collect();
+ let list_array = LargeListArray::try_new(
+ dict_field,
+ OffsetBuffer::from_lengths([3_usize, 2, 0, 1]),
+ Arc::new(dict_array),
+ Some(NullBuffer::from_iter([true, true, false, true])),
+ )
+ .unwrap();
+
+ let batch = RecordBatch::try_new(Arc::new(schema),
vec![Arc::new(list_array)]).unwrap();
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = LineDelimitedWriter::new(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ }
+
+ assert_json_eq(
+ &buf,
+ r#"{"l":["a","b","c"]}
+{"l":["a",null]}
+{}
+{"l":["c"]}
+"#,
+ );
+ }
+
+ #[test]
+ fn write_list_of_dictionary_large_values() {
+ let dict_field = Arc::new(Field::new_dictionary(
+ "item",
+ DataType::Int32,
+ DataType::LargeUtf8,
+ true,
+ ));
+ let schema = Schema::new(vec![Field::new_large_list("l",
dict_field.clone(), true)]);
+
+ let keys = PrimitiveArray::<Int32Type>::from(vec![
+ Some(0),
+ Some(1),
+ Some(2),
+ Some(0),
+ None,
+ Some(2),
+ ]);
+ let values = LargeStringArray::from(vec!["a", "b", "c"]);
+ let dict_array = DictionaryArray::try_new(keys,
Arc::new(values)).unwrap();
+
+ let list_array = LargeListArray::try_new(
+ dict_field,
+ OffsetBuffer::from_lengths([3_usize, 2, 0, 1]),
+ Arc::new(dict_array),
+ Some(NullBuffer::from_iter([true, true, false, true])),
+ )
+ .unwrap();
+
+ let batch = RecordBatch::try_new(Arc::new(schema),
vec![Arc::new(list_array)]).unwrap();
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = LineDelimitedWriter::new(&mut buf);
+ writer.write_batches(&[&batch]).unwrap();
+ }
+
+ assert_json_eq(
+ &buf,
+ r#"{"l":["a","b","c"]}
+{"l":["a",null]}
+{}
+{"l":["c"]}
+"#,
+ );
+ }
+
#[test]
fn write_timestamps() {
let ts_string = "2018-11-13T17:11:10.011375885995";