This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d031cc8aa8 feat: JSON encoding of FixedSizeList (#5646)
0d031cc8aa8 is described below

commit 0d031cc8aa81296cb1bdfedea7a7cb4ec6aa54ea
Author: Trevor Hilton <[email protected]>
AuthorDate: Mon Apr 15 10:30:04 2024 -0400

    feat: JSON encoding of FixedSizeList (#5646)
    
    Added ability to encode the FixedSizeList type in JSON. The implementation
    of FixedSizeListEncoder is very similar to that of ListEncoder, but is
    somewhat simpler, because of the constant offset.
    
    A test was added to verify behaviour of the JSON encoder with and without
    explicit nulls.
---
 arrow-json/src/writer.rs         | 85 +++++++++++++++++++++++++++++++++++++++-
 arrow-json/src/writer/encoder.rs | 53 ++++++++++++++++++++++++-
 2 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 85a81d98e1b..b97065989c5 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -834,7 +834,8 @@ mod tests {
     use serde_json::json;
 
     use arrow_array::builder::{
-        FixedSizeBinaryBuilder, Int32Builder, Int64Builder, MapBuilder, 
StringBuilder,
+        FixedSizeBinaryBuilder, FixedSizeListBuilder, Int32Builder, 
Int64Builder, MapBuilder,
+        StringBuilder,
     };
     use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
     use arrow_data::ArrayData;
@@ -2215,4 +2216,86 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_writer_fixed_size_list() {
+        let size = 3;
+        let field = FieldRef::new(Field::new("item", DataType::Int32, true));
+        let schema = SchemaRef::new(Schema::new(vec![Field::new(
+            "list",
+            DataType::FixedSizeList(field, size),
+            true,
+        )]));
+
+        let values_builder = Int32Builder::new();
+        let mut list_builder = FixedSizeListBuilder::new(values_builder, size);
+        let lists = [
+            Some([Some(1), Some(2), None]),
+            Some([Some(3), None, Some(4)]),
+            Some([None, Some(5), Some(6)]),
+            None,
+        ];
+        for list in lists {
+            match list {
+                Some(l) => {
+                    for value in l {
+                        match value {
+                            Some(v) => list_builder.values().append_value(v),
+                            None => list_builder.values().append_null(),
+                        }
+                    }
+                    list_builder.append(true);
+                }
+                None => {
+                    for _ in 0..size {
+                        list_builder.values().append_null();
+                    }
+                    list_builder.append(false);
+                }
+            }
+        }
+        let array = Arc::new(list_builder.finish()) as ArrayRef;
+        let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+
+        //encode and check JSON with explicit nulls:
+        {
+            let json_value: Value = {
+                let mut buf = Vec::new();
+                let mut writer = WriterBuilder::new()
+                    .with_explicit_nulls(true)
+                    .build::<_, JsonArray>(&mut buf);
+                writer.write(&batch).unwrap();
+                writer.close().unwrap();
+                serde_json::from_slice(&buf).unwrap()
+            };
+            assert_eq!(
+                json!([
+                    {"list": [1, 2, null]},
+                    {"list": [3, null, 4]},
+                    {"list": [null, 5, 6]},
+                    {"list": null},
+                ]),
+                json_value
+            );
+        }
+        // encode and check JSON with no explicit nulls:
+        {
+            let json_value: Value = {
+                let mut buf = Vec::new();
+                let mut writer = ArrayWriter::new(&mut buf);
+                writer.write(&batch).unwrap();
+                writer.close().unwrap();
+                serde_json::from_slice(&buf).unwrap()
+            };
+            assert_eq!(
+                json!([
+                    {"list": [1, 2, null]},
+                    {"list": [3, null, 4]},
+                    {"list": [null, 5, 6]},
+                    {}, // empty because nulls are omitted
+                ]),
+                json_value
+            );
+        }
+    }
 }
diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index 113dc5dfc75..810e65b2268 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -88,6 +88,10 @@ fn make_encoder_impl<'a>(
             let array = array.as_list::<i64>();
             (Box::new(ListEncoder::try_new(array, options)?) as _, 
array.nulls().cloned())
         }
+        DataType::FixedSizeList(_, _) => {
+            let array = array.as_fixed_size_list();
+            (Box::new(FixedSizeListEncoder::try_new(array, options)?) as _, 
array.nulls().cloned())
+        }
 
         DataType::Dictionary(_, _) => downcast_dictionary_array! {
             array => (Box::new(DictionaryEncoder::try_new(array, options)?) as 
_,  array.logical_nulls()),
@@ -100,7 +104,7 @@ fn make_encoder_impl<'a>(
         }
 
         DataType::FixedSizeBinary(_) => {
-            let array = 
array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
+            let array = array.as_fixed_size_binary();
             (Box::new(FixedSizeBinaryEncoder::new(array)) as _, 
array.nulls().cloned())
         }
 
@@ -329,6 +333,53 @@ impl<'a, O: OffsetSizeTrait> Encoder for ListEncoder<'a, 
O> {
     }
 }
 
+struct FixedSizeListEncoder<'a> {
+    value_length: usize,
+    nulls: Option<NullBuffer>,
+    encoder: Box<dyn Encoder + 'a>,
+}
+
+impl<'a> FixedSizeListEncoder<'a> {
+    fn try_new(
+        array: &'a FixedSizeListArray,
+        options: &EncoderOptions,
+    ) -> Result<Self, ArrowError> {
+        let (encoder, nulls) = make_encoder_impl(array.values().as_ref(), 
options)?;
+        Ok(Self {
+            encoder,
+            nulls,
+            value_length: array.value_length().as_usize(),
+        })
+    }
+}
+
+impl<'a> Encoder for FixedSizeListEncoder<'a> {
+    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+        let start = idx * self.value_length;
+        let end = start + self.value_length;
+        out.push(b'[');
+        match self.nulls.as_ref() {
+            Some(n) => (start..end).for_each(|idx| {
+                if idx != start {
+                    out.push(b',');
+                }
+                if n.is_null(idx) {
+                    out.extend_from_slice(b"null");
+                } else {
+                    self.encoder.encode(idx, out);
+                }
+            }),
+            None => (start..end).for_each(|idx| {
+                if idx != start {
+                    out.push(b',');
+                }
+                self.encoder.encode(idx, out);
+            }),
+        }
+        out.push(b']');
+    }
+}
+
 struct DictionaryEncoder<'a, K: ArrowDictionaryKeyType> {
     keys: ScalarBuffer<K::Native>,
     encoder: Box<dyn Encoder + 'a>,

Reply via email to