This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 22cf1ba998 Factor out json reader's static make_decoder args to a 
struct (#9271)
22cf1ba998 is described below

commit 22cf1ba998bbf979888bda79f33f180b4ea19aba
Author: Ryan Johnson <[email protected]>
AuthorDate: Mon Jan 26 15:05:47 2026 -0700

    Factor out json reader's static make_decoder args to a struct (#9271)
    
    # Which issue does this PR close?
    
    Part of https://github.com/apache/arrow-rs/issues/8987
    
    # Rationale for this change
    
    Supporting custom type decoders in JSON reads requires some ability to
    create nested and/or delegating decoder factories. For example:
    1. Quirks-mode decoding of a complex type that attempts to parse string
    inputs to the requested type
       * Falls back to the default decoder for non-string inputs
    2. Convert complex type mismatches to NULL instead of error
       * Use the default decoder only if the input type matches
    3. Compound decoder factories, e.g.
    * Support multiple extension types, each provided by their own factory
    * Support a mix of "standard" extension type decoders and custom "quicks
    mode" decoders
    
    Such use cases mean decoder factories will need access to the
    functionality currently provided by `make_decoder`, and the latter will
    need to be aware of the top-level decoder factory in use, even if that
    decoder factory has delegated to a more specific decoder factory who
    would actually call `make_decoder`.
    
    # What changes are included in this PR?
    
    Create a new `DecoderContext` struct that bundles up the top-level args
    that are passed unchanged through the entire schema traversal that
    `make_decoder` performs. It reduces the arg count, and also gives a
    place to store the decoder factory that `make_decoder` should eventually
    know how to use, without churning function signatures a second time. It
    will also help insulate the signature of the private `make_decoder` API
    from the decoder factory API that will become public.
    
    # Are these changes tested?
    
    Existing unit tests validate the refactor.
    
    # Are there any user-facing changes?
    
    No. All methods and data types involved are private to the arrow-json
    crate.
---
 arrow-json/src/reader/list_array.rs   | 15 ++------
 arrow-json/src/reader/map_array.rs    | 23 +++---------
 arrow-json/src/reader/mod.rs          | 68 +++++++++++++++++++++++++++--------
 arrow-json/src/reader/struct_array.rs | 47 ++++++++++--------------
 4 files changed, 79 insertions(+), 74 deletions(-)

diff --git a/arrow-json/src/reader/list_array.rs 
b/arrow-json/src/reader/list_array.rs
index e74fef7917..81fe4e3ffa 100644
--- a/arrow-json/src/reader/list_array.rs
+++ b/arrow-json/src/reader/list_array.rs
@@ -15,9 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::StructMode;
 use crate::reader::tape::{Tape, TapeElement};
-use crate::reader::{ArrayDecoder, make_decoder};
+use crate::reader::{ArrayDecoder, DecoderContext};
 use arrow_array::OffsetSizeTrait;
 use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
 use arrow_buffer::buffer::NullBuffer;
@@ -34,24 +33,16 @@ pub struct ListArrayDecoder<O> {
 
 impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
     pub fn new(
+        ctx: &DecoderContext,
         data_type: DataType,
-        coerce_primitive: bool,
-        strict_mode: bool,
         is_nullable: bool,
-        struct_mode: StructMode,
     ) -> Result<Self, ArrowError> {
         let field = match &data_type {
             DataType::List(f) if !O::IS_LARGE => f,
             DataType::LargeList(f) if O::IS_LARGE => f,
             _ => unreachable!(),
         };
-        let decoder = make_decoder(
-            field.data_type().clone(),
-            coerce_primitive,
-            strict_mode,
-            field.is_nullable(),
-            struct_mode,
-        )?;
+        let decoder = ctx.make_decoder(field.data_type().clone(), 
field.is_nullable())?;
 
         Ok(Self {
             data_type,
diff --git a/arrow-json/src/reader/map_array.rs 
b/arrow-json/src/reader/map_array.rs
index c2068577a0..3c19cb3eb5 100644
--- a/arrow-json/src/reader/map_array.rs
+++ b/arrow-json/src/reader/map_array.rs
@@ -15,9 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::StructMode;
 use crate::reader::tape::{Tape, TapeElement};
-use crate::reader::{ArrayDecoder, make_decoder};
+use crate::reader::{ArrayDecoder, DecoderContext};
 use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
 use arrow_buffer::ArrowNativeType;
 use arrow_buffer::buffer::NullBuffer;
@@ -33,11 +32,9 @@ pub struct MapArrayDecoder {
 
 impl MapArrayDecoder {
     pub fn new(
+        ctx: &DecoderContext,
         data_type: DataType,
-        coerce_primitive: bool,
-        strict_mode: bool,
         is_nullable: bool,
-        struct_mode: StructMode,
     ) -> Result<Self, ArrowError> {
         let fields = match &data_type {
             DataType::Map(_, true) => {
@@ -56,20 +53,8 @@ impl MapArrayDecoder {
             _ => unreachable!(),
         };
 
-        let keys = make_decoder(
-            fields[0].data_type().clone(),
-            coerce_primitive,
-            strict_mode,
-            fields[0].is_nullable(),
-            struct_mode,
-        )?;
-        let values = make_decoder(
-            fields[1].data_type().clone(),
-            coerce_primitive,
-            strict_mode,
-            fields[1].is_nullable(),
-            struct_mode,
-        )?;
+        let keys = ctx.make_decoder(fields[0].data_type().clone(), 
fields[0].is_nullable())?;
+        let values = ctx.make_decoder(fields[1].data_type().clone(), 
fields[1].is_nullable())?;
 
         Ok(Self {
             data_type,
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index f5fd1a8e7c..6b3ab0097f 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -303,13 +303,12 @@ impl ReaderBuilder {
             }
         };
 
-        let decoder = make_decoder(
-            data_type,
-            self.coerce_primitive,
-            self.strict_mode,
-            nullable,
-            self.struct_mode,
-        )?;
+        let ctx = DecoderContext {
+            coerce_primitive: self.coerce_primitive,
+            strict_mode: self.strict_mode,
+            struct_mode: self.struct_mode,
+        };
+        let decoder = ctx.make_decoder(data_type, nullable)?;
 
         let num_fields = self.schema.flattened_fields().len();
 
@@ -679,6 +678,48 @@ trait ArrayDecoder: Send {
     fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, 
ArrowError>;
 }
 
+/// Context for decoder creation, containing configuration.
+///
+/// This context is passed through the decoder creation process and contains
+/// all the configuration needed to create decoders recursively.
+pub struct DecoderContext {
+    /// Whether to coerce primitives to strings
+    coerce_primitive: bool,
+    /// Whether to validate struct fields strictly
+    strict_mode: bool,
+    /// How to decode struct fields
+    struct_mode: StructMode,
+}
+
+impl DecoderContext {
+    /// Returns whether to coerce primitive types (e.g., number to string)
+    pub fn coerce_primitive(&self) -> bool {
+        self.coerce_primitive
+    }
+
+    /// Returns whether to validate struct fields strictly
+    pub fn strict_mode(&self) -> bool {
+        self.strict_mode
+    }
+
+    /// Returns how to decode struct fields
+    pub fn struct_mode(&self) -> StructMode {
+        self.struct_mode
+    }
+
+    /// Create a decoder for a type.
+    ///
+    /// This is the standard way to create child decoders from within a decoder
+    /// implementation.
+    fn make_decoder(
+        &self,
+        data_type: DataType,
+        is_nullable: bool,
+    ) -> Result<Box<dyn ArrayDecoder>, ArrowError> {
+        make_decoder(self, data_type, is_nullable)
+    }
+}
+
 macro_rules! primitive_decoder {
     ($t:ty, $data_type:expr) => {
         Ok(Box::new(PrimitiveArrayDecoder::<$t>::new($data_type)))
@@ -686,12 +727,11 @@ macro_rules! primitive_decoder {
 }
 
 fn make_decoder(
+    ctx: &DecoderContext,
     data_type: DataType,
-    coerce_primitive: bool,
-    strict_mode: bool,
     is_nullable: bool,
-    struct_mode: StructMode,
 ) -> Result<Box<dyn ArrayDecoder>, ArrowError> {
+    let coerce_primitive = ctx.coerce_primitive();
     downcast_integer! {
         data_type => (primitive_decoder, data_type),
         DataType::Null => Ok(Box::<NullArrayDecoder>::default()),
@@ -744,14 +784,14 @@ fn make_decoder(
         DataType::Utf8 => 
Ok(Box::new(StringArrayDecoder::<i32>::new(coerce_primitive))),
         DataType::Utf8View => 
Ok(Box::new(StringViewArrayDecoder::new(coerce_primitive))),
         DataType::LargeUtf8 => 
Ok(Box::new(StringArrayDecoder::<i64>::new(coerce_primitive))),
-        DataType::List(_) => 
Ok(Box::new(ListArrayDecoder::<i32>::new(data_type, coerce_primitive, 
strict_mode, is_nullable, struct_mode)?)),
-        DataType::LargeList(_) => 
Ok(Box::new(ListArrayDecoder::<i64>::new(data_type, coerce_primitive, 
strict_mode, is_nullable, struct_mode)?)),
-        DataType::Struct(_) => Ok(Box::new(StructArrayDecoder::new(data_type, 
coerce_primitive, strict_mode, is_nullable, struct_mode)?)),
+        DataType::List(_) => Ok(Box::new(ListArrayDecoder::<i32>::new(ctx, 
data_type, is_nullable)?)),
+        DataType::LargeList(_) => 
Ok(Box::new(ListArrayDecoder::<i64>::new(ctx, data_type, is_nullable)?)),
+        DataType::Struct(_) => Ok(Box::new(StructArrayDecoder::new(ctx, 
data_type, is_nullable)?)),
         DataType::Binary => Ok(Box::new(BinaryArrayDecoder::<i32>::default())),
         DataType::LargeBinary => 
Ok(Box::new(BinaryArrayDecoder::<i64>::default())),
         DataType::FixedSizeBinary(len) => 
Ok(Box::new(FixedSizeBinaryArrayDecoder::new(len))),
         DataType::BinaryView => Ok(Box::new(BinaryViewDecoder::default())),
-        DataType::Map(_, _) => Ok(Box::new(MapArrayDecoder::new(data_type, 
coerce_primitive, strict_mode, is_nullable, struct_mode)?)),
+        DataType::Map(_, _) => Ok(Box::new(MapArrayDecoder::new(ctx, 
data_type, is_nullable)?)),
         d => Err(ArrowError::NotYetImplemented(format!("Support for {d} in 
JSON reader")))
     }
 }
diff --git a/arrow-json/src/reader/struct_array.rs 
b/arrow-json/src/reader/struct_array.rs
index df0d5b8a5b..915c0607fd 100644
--- a/arrow-json/src/reader/struct_array.rs
+++ b/arrow-json/src/reader/struct_array.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::reader::tape::{Tape, TapeElement};
-use crate::reader::{ArrayDecoder, StructMode, make_decoder};
+use crate::reader::{ArrayDecoder, DecoderContext, StructMode};
 use arrow_array::builder::BooleanBufferBuilder;
 use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
@@ -80,42 +80,31 @@ pub struct StructArrayDecoder {
 
 impl StructArrayDecoder {
     pub fn new(
+        ctx: &DecoderContext,
         data_type: DataType,
-        coerce_primitive: bool,
-        strict_mode: bool,
         is_nullable: bool,
-        struct_mode: StructMode,
     ) -> Result<Self, ArrowError> {
-        let (decoders, field_name_to_index) = {
-            let fields = struct_fields(&data_type);
-            let decoders = fields
-                .iter()
-                .map(|f| {
-                    // If this struct nullable, need to permit nullability in 
child array
-                    // StructArrayDecoder::decode verifies that if the child 
is not nullable
-                    // it doesn't contain any nulls not masked by its parent
-                    let nullable = f.is_nullable() || is_nullable;
-                    make_decoder(
-                        f.data_type().clone(),
-                        coerce_primitive,
-                        strict_mode,
-                        nullable,
-                        struct_mode,
-                    )
-                })
-                .collect::<Result<Vec<_>, ArrowError>>()?;
-            let field_name_to_index = if struct_mode == StructMode::ObjectOnly 
{
-                build_field_index(fields)
-            } else {
-                None
-            };
-            (decoders, field_name_to_index)
+        let struct_mode = ctx.struct_mode();
+        let fields = struct_fields(&data_type);
+
+        // If this struct nullable, need to permit nullability in child array
+        // StructArrayDecoder::decode verifies that if the child is not 
nullable
+        // it doesn't contain any nulls not masked by its parent
+        let decoders = fields
+            .iter()
+            .map(|f| ctx.make_decoder(f.data_type().clone(), f.is_nullable() 
|| is_nullable))
+            .collect::<Result<Vec<_>, ArrowError>>()?;
+
+        let field_name_to_index = if struct_mode == StructMode::ObjectOnly {
+            build_field_index(fields)
+        } else {
+            None
         };
 
         Ok(Self {
             data_type,
             decoders,
-            strict_mode,
+            strict_mode: ctx.strict_mode(),
             is_nullable,
             struct_mode,
             field_name_to_index,

Reply via email to