This is an automated email from the ASF dual-hosted git repository.
houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new abe2cf2 fix: add LargeUtf8 support in json writer (#1358)
abe2cf2 is described below
commit abe2cf2c972afd97859d9d16d4a7fc0f9fb08bd9
Author: Tiphaine Ruy <[email protected]>
AuthorDate: Wed Feb 23 19:59:22 2022 +0100
fix: add LargeUtf8 support in json writer (#1358)
---
arrow/src/json/writer.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs
index 0a96392..64504cc 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow/src/json/writer.rs
@@ -165,6 +165,13 @@ pub fn array_to_json_array(array: &ArrayRef) ->
Result<Vec<Value>> {
None => Value::Null,
})
.collect()),
+ DataType::LargeUtf8 => Ok(as_largestring_array(array)
+ .iter()
+ .map(|maybe_value| match maybe_value {
+ Some(v) => v.into(),
+ None => Value::Null,
+ })
+ .collect()),
DataType::Int8 => primitive_array_to_json::<Int8Type>(array),
DataType::Int16 => primitive_array_to_json::<Int16Type>(array),
DataType::Int32 => primitive_array_to_json::<Int32Type>(array),
@@ -297,6 +304,15 @@ fn set_column_for_json_rows(
DataType::Utf8 => {
set_column_by_array_type!(as_string_array, col_name, rows, array,
row_count);
}
+ DataType::LargeUtf8 => {
+ set_column_by_array_type!(
+ as_largestring_array,
+ col_name,
+ rows,
+ array,
+ row_count
+ );
+ }
DataType::Date32 => {
set_temporal_column_by_array_type!(
Date32Array,
@@ -753,6 +769,37 @@ mod tests {
}
#[test]
+ fn write_large_utf8() {
+ let schema = Schema::new(vec![
+ Field::new("c1", DataType::Utf8, true),
+ Field::new("c2", DataType::LargeUtf8, true),
+ ]);
+
+ let a = StringArray::from(vec![Some("a"), None, Some("c"), Some("d"),
None]);
+ let b = LargeStringArray::from(vec![Some("a"), Some("b"), None,
Some("d"), None]);
+
+ let batch =
+ RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a),
Arc::new(b)])
+ .unwrap();
+
+ let mut buf = Vec::new();
+ {
+ let mut writer = LineDelimitedWriter::new(&mut buf);
+ writer.write_batches(&[batch]).unwrap();
+ }
+
+ assert_eq!(
+ String::from_utf8(buf).unwrap(),
+ r#"{"c1":"a","c2":"a"}
+{"c2":"b"}
+{"c1":"c"}
+{"c1":"d","c2":"d"}
+{}
+"#
+ );
+ }
+
+ #[test]
fn write_dictionary() {
let schema = Schema::new(vec![
Field::new(