This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1621350176 Support Utf8View in JSON serialization (#6651)
1621350176 is described below
commit 16213501769371c7d2d6ce299755337b6db4d8fd
Author: Jon Mease <[email protected]>
AuthorDate: Wed Oct 30 16:09:35 2024 -0400
Support Utf8View in JSON serialization (#6651)
---
arrow-json/src/writer/encoder.rs | 12 ++++++++++++
arrow-json/src/writer/mod.rs | 16 +++++++++++-----
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index ae7d0d94e5..84ed384cfd 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -80,6 +80,10 @@ fn make_encoder_impl<'a>(
let array = array.as_string::<i64>();
(Box::new(StringEncoder(array)) as _, array.nulls().cloned())
}
+ DataType::Utf8View => {
+ let array = array.as_string_view();
+ (Box::new(StringViewEncoder(array)) as _, array.nulls().cloned())
+ }
DataType::List(_) => {
let array = array.as_list::<i32>();
(Box::new(ListEncoder::try_new(array, options)?) as _,
array.nulls().cloned())
@@ -311,6 +315,14 @@ impl<O: OffsetSizeTrait> Encoder for StringEncoder<'_, O> {
}
}
+struct StringViewEncoder<'a>(&'a StringViewArray);
+
+impl Encoder for StringViewEncoder<'_> {
+ fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+ encode_string(self.0.value(idx), out);
+ }
+}
+
struct ListEncoder<'a, O: OffsetSizeTrait> {
offsets: OffsetBuffer<O>,
nulls: Option<NullBuffer>,
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index 77e17d89f6..a37aa5ff8c 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -462,16 +462,22 @@ mod tests {
}
#[test]
- fn write_large_utf8() {
+ fn write_large_utf8_and_utf8_view() {
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, true),
Field::new("c2", DataType::LargeUtf8, true),
+ Field::new("c3", DataType::Utf8View, true),
]);
let a = StringArray::from(vec![Some("a"), None, Some("c"), Some("d"),
None]);
let b = LargeStringArray::from(vec![Some("a"), Some("b"), None,
Some("d"), None]);
+ let c = StringViewArray::from(vec![Some("a"), Some("b"), None,
Some("d"), None]);
- let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a),
Arc::new(b)]).unwrap();
+ let batch = RecordBatch::try_new(
+ Arc::new(schema),
+ vec![Arc::new(a), Arc::new(b), Arc::new(c)],
+ )
+ .unwrap();
let mut buf = Vec::new();
{
@@ -481,10 +487,10 @@ mod tests {
assert_json_eq(
&buf,
- r#"{"c1":"a","c2":"a"}
-{"c2":"b"}
+ r#"{"c1":"a","c2":"a","c3":"a"}
+{"c2":"b","c3":"b"}
{"c1":"c"}
-{"c1":"d","c2":"d"}
+{"c1":"d","c2":"d","c3":"d"}
{}
"#,
);