nathanielc commented on issue #5733:
URL: https://github.com/apache/arrow-rs/issues/5733#issuecomment-2628171374
Ok so it turns out that the json functions doesn't use the union as I was
expecting. They still encode objects directly as strings. So encoding the union
directly therefore does not solve my use case. Here is my wip diff of changes
```patch
diff --git a/arrow-json/src/writer/encoder.rs
b/arrow-json/src/writer/encoder.rs
index ed430fe6a..876c499ab 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -24,7 +24,7 @@ use arrow_schema::{ArrowError, DataType, FieldRef};
use half::f16;
use lexical_core::FormattedSize;
use serde::Serializer;
-use std::io::Write;
+use std::{collections::BTreeMap, io::Write};
#[derive(Debug, Clone, Default)]
pub struct EncoderOptions {
@@ -138,6 +138,26 @@ fn make_encoder_impl<'a>(
};
(Box::new(encoder) as _, array.nulls().cloned())
}
+ DataType::Union(fields,_) => {
+ let array = array.as_union();
+ let encoders = fields.iter().map(|(type_id, field )| {
+ let (encoder, nulls) =
make_encoder_impl(array.child(type_id), options)?;
+ Ok((
+ type_id,
+ FieldEncoder {
+ field: field.clone(),
+ encoder,
+ nulls
+ }
+ ))
+ }).collect::<Result<BTreeMap<_,_>, ArrowError>>()?;
+
+ let encoder = UnionArrayEncoder{
+ array,
+ encoders,
+ };
+ (Box::new(encoder) as _, array.nulls().cloned())
+ }
DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
let options = FormatOptions::new().with_display_error(true);
let formatter = ArrayFormatter::try_new(array, &options)?;
@@ -210,6 +230,20 @@ impl Encoder for StructArrayEncoder<'_> {
}
}
+struct UnionArrayEncoder<'a> {
+ array: &'a UnionArray,
+ encoders: BTreeMap<i8, FieldEncoder<'a>>,
+}
+
+impl<'a> Encoder for UnionArrayEncoder<'a> {
+ fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+ let type_id = self.array.type_id(idx);
+ self.encoders
+ .get_mut(&type_id)
+ .map(|f| f.encoder.encode(idx, out));
+ }
+}
+
trait PrimitiveEncode: ArrowNativeType {
type Buffer;
```
Its a pretty simple change if someone wants to take it further. Given #7015
however its usefulness is likely diminished.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]