This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 0d031cc8aa8 feat: JSON encoding of FixedSizeList (#5646)
0d031cc8aa8 is described below
commit 0d031cc8aa81296cb1bdfedea7a7cb4ec6aa54ea
Author: Trevor Hilton <[email protected]>
AuthorDate: Mon Apr 15 10:30:04 2024 -0400
feat: JSON encoding of FixedSizeList (#5646)
Added ability to encode the FixedSizeList type in JSON. The implementation
of FixedSizeListEncoder is very similar to that of ListEncoder, but is
somewhat simpler, because of the constant offset.
A test was added to verify behaviour of the JSON encoder with and without
explicit nulls.
---
arrow-json/src/writer.rs | 85 +++++++++++++++++++++++++++++++++++++++-
arrow-json/src/writer/encoder.rs | 53 ++++++++++++++++++++++++-
2 files changed, 136 insertions(+), 2 deletions(-)
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 85a81d98e1b..b97065989c5 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -834,7 +834,8 @@ mod tests {
use serde_json::json;
use arrow_array::builder::{
- FixedSizeBinaryBuilder, Int32Builder, Int64Builder, MapBuilder,
StringBuilder,
+ FixedSizeBinaryBuilder, FixedSizeListBuilder, Int32Builder,
Int64Builder, MapBuilder,
+ StringBuilder,
};
use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
use arrow_data::ArrayData;
@@ -2215,4 +2216,86 @@ mod tests {
);
}
}
+
+ #[test]
+ fn test_writer_fixed_size_list() {
+ let size = 3;
+ let field = FieldRef::new(Field::new("item", DataType::Int32, true));
+ let schema = SchemaRef::new(Schema::new(vec![Field::new(
+ "list",
+ DataType::FixedSizeList(field, size),
+ true,
+ )]));
+
+ let values_builder = Int32Builder::new();
+ let mut list_builder = FixedSizeListBuilder::new(values_builder, size);
+ let lists = [
+ Some([Some(1), Some(2), None]),
+ Some([Some(3), None, Some(4)]),
+ Some([None, Some(5), Some(6)]),
+ None,
+ ];
+ for list in lists {
+ match list {
+ Some(l) => {
+ for value in l {
+ match value {
+ Some(v) => list_builder.values().append_value(v),
+ None => list_builder.values().append_null(),
+ }
+ }
+ list_builder.append(true);
+ }
+ None => {
+ for _ in 0..size {
+ list_builder.values().append_null();
+ }
+ list_builder.append(false);
+ }
+ }
+ }
+ let array = Arc::new(list_builder.finish()) as ArrayRef;
+ let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+
+ //encode and check JSON with explicit nulls:
+ {
+ let json_value: Value = {
+ let mut buf = Vec::new();
+ let mut writer = WriterBuilder::new()
+ .with_explicit_nulls(true)
+ .build::<_, JsonArray>(&mut buf);
+ writer.write(&batch).unwrap();
+ writer.close().unwrap();
+ serde_json::from_slice(&buf).unwrap()
+ };
+ assert_eq!(
+ json!([
+ {"list": [1, 2, null]},
+ {"list": [3, null, 4]},
+ {"list": [null, 5, 6]},
+ {"list": null},
+ ]),
+ json_value
+ );
+ }
+ // encode and check JSON with no explicit nulls:
+ {
+ let json_value: Value = {
+ let mut buf = Vec::new();
+ let mut writer = ArrayWriter::new(&mut buf);
+ writer.write(&batch).unwrap();
+ writer.close().unwrap();
+ serde_json::from_slice(&buf).unwrap()
+ };
+ assert_eq!(
+ json!([
+ {"list": [1, 2, null]},
+ {"list": [3, null, 4]},
+ {"list": [null, 5, 6]},
+ {}, // empty because nulls are omitted
+ ]),
+ json_value
+ );
+ }
+ }
}
diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index 113dc5dfc75..810e65b2268 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -88,6 +88,10 @@ fn make_encoder_impl<'a>(
let array = array.as_list::<i64>();
(Box::new(ListEncoder::try_new(array, options)?) as _,
array.nulls().cloned())
}
+ DataType::FixedSizeList(_, _) => {
+ let array = array.as_fixed_size_list();
+ (Box::new(FixedSizeListEncoder::try_new(array, options)?) as _,
array.nulls().cloned())
+ }
DataType::Dictionary(_, _) => downcast_dictionary_array! {
array => (Box::new(DictionaryEncoder::try_new(array, options)?) as
_, array.logical_nulls()),
@@ -100,7 +104,7 @@ fn make_encoder_impl<'a>(
}
DataType::FixedSizeBinary(_) => {
- let array =
array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
+ let array = array.as_fixed_size_binary();
(Box::new(FixedSizeBinaryEncoder::new(array)) as _,
array.nulls().cloned())
}
@@ -329,6 +333,53 @@ impl<'a, O: OffsetSizeTrait> Encoder for ListEncoder<'a,
O> {
}
}
+struct FixedSizeListEncoder<'a> {
+ value_length: usize,
+ nulls: Option<NullBuffer>,
+ encoder: Box<dyn Encoder + 'a>,
+}
+
+impl<'a> FixedSizeListEncoder<'a> {
+ fn try_new(
+ array: &'a FixedSizeListArray,
+ options: &EncoderOptions,
+ ) -> Result<Self, ArrowError> {
+ let (encoder, nulls) = make_encoder_impl(array.values().as_ref(),
options)?;
+ Ok(Self {
+ encoder,
+ nulls,
+ value_length: array.value_length().as_usize(),
+ })
+ }
+}
+
+impl<'a> Encoder for FixedSizeListEncoder<'a> {
+ fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+ let start = idx * self.value_length;
+ let end = start + self.value_length;
+ out.push(b'[');
+ match self.nulls.as_ref() {
+ Some(n) => (start..end).for_each(|idx| {
+ if idx != start {
+ out.push(b',');
+ }
+ if n.is_null(idx) {
+ out.extend_from_slice(b"null");
+ } else {
+ self.encoder.encode(idx, out);
+ }
+ }),
+ None => (start..end).for_each(|idx| {
+ if idx != start {
+ out.push(b',');
+ }
+ self.encoder.encode(idx, out);
+ }),
+ }
+ out.push(b']');
+ }
+}
+
struct DictionaryEncoder<'a, K: ArrowDictionaryKeyType> {
keys: ScalarBuffer<K::Native>,
encoder: Box<dyn Encoder + 'a>,