This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 1621350176 Support Utf8View in JSON serialization (#6651)
1621350176 is described below

commit 16213501769371c7d2d6ce299755337b6db4d8fd
Author: Jon Mease <[email protected]>
AuthorDate: Wed Oct 30 16:09:35 2024 -0400

    Support Utf8View in JSON serialization (#6651)
---
 arrow-json/src/writer/encoder.rs | 12 ++++++++++++
 arrow-json/src/writer/mod.rs     | 16 +++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index ae7d0d94e5..84ed384cfd 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -80,6 +80,10 @@ fn make_encoder_impl<'a>(
             let array = array.as_string::<i64>();
             (Box::new(StringEncoder(array)) as _, array.nulls().cloned())
         }
+        DataType::Utf8View => {
+            let array = array.as_string_view();
+            (Box::new(StringViewEncoder(array)) as _, array.nulls().cloned())
+        }
         DataType::List(_) => {
             let array = array.as_list::<i32>();
             (Box::new(ListEncoder::try_new(array, options)?) as _, 
array.nulls().cloned())
@@ -311,6 +315,14 @@ impl<O: OffsetSizeTrait> Encoder for StringEncoder<'_, O> {
     }
 }
 
+struct StringViewEncoder<'a>(&'a StringViewArray);
+
+impl Encoder for StringViewEncoder<'_> {
+    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+        encode_string(self.0.value(idx), out);
+    }
+}
+
 struct ListEncoder<'a, O: OffsetSizeTrait> {
     offsets: OffsetBuffer<O>,
     nulls: Option<NullBuffer>,
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index 77e17d89f6..a37aa5ff8c 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -462,16 +462,22 @@ mod tests {
     }
 
     #[test]
-    fn write_large_utf8() {
+    fn write_large_utf8_and_utf8_view() {
         let schema = Schema::new(vec![
             Field::new("c1", DataType::Utf8, true),
             Field::new("c2", DataType::LargeUtf8, true),
+            Field::new("c3", DataType::Utf8View, true),
         ]);
 
         let a = StringArray::from(vec![Some("a"), None, Some("c"), Some("d"), 
None]);
         let b = LargeStringArray::from(vec![Some("a"), Some("b"), None, 
Some("d"), None]);
+        let c = StringViewArray::from(vec![Some("a"), Some("b"), None, 
Some("d"), None]);
 
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), 
Arc::new(b)]).unwrap();
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(a), Arc::new(b), Arc::new(c)],
+        )
+        .unwrap();
 
         let mut buf = Vec::new();
         {
@@ -481,10 +487,10 @@ mod tests {
 
         assert_json_eq(
             &buf,
-            r#"{"c1":"a","c2":"a"}
-{"c2":"b"}
+            r#"{"c1":"a","c2":"a","c3":"a"}
+{"c2":"b","c3":"b"}
 {"c1":"c"}
-{"c1":"d","c2":"d"}
+{"c1":"d","c2":"d","c3":"d"}
 {}
 "#,
         );

Reply via email to