This is an automated email from the ASF dual-hosted git repository.

houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new abe2cf2  fix: add LargeUtf8 support in json writer (#1358)
abe2cf2 is described below

commit abe2cf2c972afd97859d9d16d4a7fc0f9fb08bd9
Author: Tiphaine Ruy <[email protected]>
AuthorDate: Wed Feb 23 19:59:22 2022 +0100

    fix: add LargeUtf8 support in json writer (#1358)
---
 arrow/src/json/writer.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs
index 0a96392..64504cc 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow/src/json/writer.rs
@@ -165,6 +165,13 @@ pub fn array_to_json_array(array: &ArrayRef) -> 
Result<Vec<Value>> {
                 None => Value::Null,
             })
             .collect()),
+        DataType::LargeUtf8 => Ok(as_largestring_array(array)
+            .iter()
+            .map(|maybe_value| match maybe_value {
+                Some(v) => v.into(),
+                None => Value::Null,
+            })
+            .collect()),
         DataType::Int8 => primitive_array_to_json::<Int8Type>(array),
         DataType::Int16 => primitive_array_to_json::<Int16Type>(array),
         DataType::Int32 => primitive_array_to_json::<Int32Type>(array),
@@ -297,6 +304,15 @@ fn set_column_for_json_rows(
         DataType::Utf8 => {
             set_column_by_array_type!(as_string_array, col_name, rows, array, 
row_count);
         }
+        DataType::LargeUtf8 => {
+            set_column_by_array_type!(
+                as_largestring_array,
+                col_name,
+                rows,
+                array,
+                row_count
+            );
+        }
         DataType::Date32 => {
             set_temporal_column_by_array_type!(
                 Date32Array,
@@ -753,6 +769,37 @@ mod tests {
     }
 
     #[test]
+    fn write_large_utf8() {
+        let schema = Schema::new(vec![
+            Field::new("c1", DataType::Utf8, true),
+            Field::new("c2", DataType::LargeUtf8, true),
+        ]);
+
+        let a = StringArray::from(vec![Some("a"), None, Some("c"), Some("d"), 
None]);
+        let b = LargeStringArray::from(vec![Some("a"), Some("b"), None, 
Some("d"), None]);
+
+        let batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), 
Arc::new(b)])
+                .unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[batch]).unwrap();
+        }
+
+        assert_eq!(
+            String::from_utf8(buf).unwrap(),
+            r#"{"c1":"a","c2":"a"}
+{"c2":"b"}
+{"c1":"c"}
+{"c1":"d","c2":"d"}
+{}
+"#
+        );
+    }
+
+    #[test]
     fn write_dictionary() {
         let schema = Schema::new(vec![
             Field::new(

Reply via email to