alamb commented on code in PR #7015:
URL: https://github.com/apache/arrow-rs/pull/7015#discussion_r1999482887


##########
arrow-json/src/writer/encoder.rs:
##########
@@ -362,42 +480,36 @@ impl<O: OffsetSizeTrait> Encoder for ListEncoder<'_, O> {
         let end = self.offsets[idx + 1].as_usize();
         let start = self.offsets[idx].as_usize();
         out.push(b'[');
-        match self.nulls.as_ref() {
-            Some(n) => (start..end).for_each(|idx| {
-                if idx != start {
-                    out.push(b',')
-                }
-                match n.is_null(idx) {
-                    true => out.extend_from_slice(b"null"),
-                    false => self.encoder.encode(idx, out),
-                }
-            }),
-            None => (start..end).for_each(|idx| {
-                if idx != start {
-                    out.push(b',')
-                }
+

Review Comment:
   I think the previous formulation is important for performance:
   1. It checks for nulls once per output list (rather than once per output 
list element)
   2. The code for the `None` case is simpler and this more likely to be auto 
vectorized



##########
arrow-json/src/writer/encoder.rs:
##########
@@ -25,126 +27,230 @@ use arrow_schema::{ArrowError, DataType, FieldRef};
 use half::f16;
 use lexical_core::FormattedSize;
 use serde::Serializer;
-use std::io::Write;
 
+/// Configuration options for the JSON encoder.

Review Comment:
   I verified that this structure is not currently publically exported and thus 
this change is not a breaking API change:
   https://docs.rs/arrow-json/54.2.1/arrow_json/index.html?search=EncoderOptions



##########
arrow-json/src/writer/mod.rs:
##########
@@ -1953,4 +1968,317 @@ mod tests {
 "#,
         );
     }
+
+    fn make_fallback_encoder_test_data() -> (RecordBatch, Arc<dyn 
EncoderFactory>) {
+        // Note: this is not intended to be an efficient implementation.
+        // Just a simple example to demonstrate how to implement a custom 
encoder.
+        #[derive(Debug)]
+        enum UnionValue {
+            Null,
+            Int32(i32),
+            String(String),
+        }
+
+        #[derive(Debug)]
+        struct UnionEncoder {

Review Comment:
   Filed a ticket to track
   - https://github.com/apache/arrow-rs/issues/7302



##########
arrow-json/src/writer/encoder.rs:
##########
@@ -25,126 +27,230 @@ use arrow_schema::{ArrowError, DataType, FieldRef};
 use half::f16;
 use lexical_core::FormattedSize;
 use serde::Serializer;
-use std::io::Write;
 
+/// Configuration options for the JSON encoder.
 #[derive(Debug, Clone, Default)]
 pub struct EncoderOptions {
-    pub explicit_nulls: bool,
-    pub struct_mode: StructMode,
+    /// Whether to include nulls in the output or elide them.
+    explicit_nulls: bool,
+    /// Whether to encode structs as JSON objects or JSON arrays of their 
values.
+    struct_mode: StructMode,
+    /// An optional hook for customizing encoding behavior.
+    encoder_factory: Option<Arc<dyn EncoderFactory>>,
+}
+
+impl EncoderOptions {
+    /// Set whether to include nulls in the output or elide them.
+    pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self {
+        self.explicit_nulls = explicit_nulls;
+        self
+    }
+
+    /// Set whether to encode structs as JSON objects or JSON arrays of their 
values.
+    pub fn with_struct_mode(mut self, struct_mode: StructMode) -> Self {
+        self.struct_mode = struct_mode;
+        self
+    }
+
+    /// Set an optional hook for customizing encoding behavior.
+    pub fn with_encoder_factory(mut self, encoder_factory: Arc<dyn 
EncoderFactory>) -> Self {
+        self.encoder_factory = Some(encoder_factory);
+        self
+    }
+
+    /// Get whether to include nulls in the output or elide them.
+    pub fn explicit_nulls(&self) -> bool {
+        self.explicit_nulls
+    }
+
+    /// Get whether to encode structs as JSON objects or JSON arrays of their 
values.
+    pub fn struct_mode(&self) -> StructMode {
+        self.struct_mode
+    }
+
+    /// Get the optional hook for customizing encoding behavior.
+    pub fn encoder_factory(&self) -> Option<&Arc<dyn EncoderFactory>> {
+        self.encoder_factory.as_ref()
+    }
+}
+
+/// A trait to create custom encoders for specific data types.
+///
+/// This allows overriding the default encoders for specific data types,

Review Comment:
   It would be sweet to add some toy doc example of using this `EncoderFactory` 
in a doc comment



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to