This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch 402-extract-union-schema-to-its-own-module
in repository https://gitbox.apache.org/repos/asf/avro-rs.git

commit 4bb88a8d8d60ce2c848da2b256e9d18d6bbb18cd
Author: Martin Tzvetanov Grigorov <[email protected]>
AuthorDate: Mon Jan 19 13:53:29 2026 +0200

    chore: Extract UnionSchema to its own module (src/schema/union.rs)
    
    No functional changes!
---
 avro/src/schema/mod.rs   | 107 ++--------------------------------------
 avro/src/schema/union.rs | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 103 deletions(-)

diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs
index bc7832b..90a48b8 100644
--- a/avro/src/schema/mod.rs
+++ b/avro/src/schema/mod.rs
@@ -41,10 +41,12 @@ use std::{
 use strum_macros::{Display, EnumDiscriminants};
 
 mod record;
-use crate::schema::record::RecordSchemaParseLocation;
-pub use crate::schema::record::{
+use record::RecordSchemaParseLocation;
+pub use record::{
     RecordField, RecordFieldBuilder, RecordFieldOrder, RecordSchema, 
RecordSchemaBuilder,
 };
+mod union;
+pub use union::UnionSchema;
 
 /// Represents an Avro schema fingerprint
 /// More information about Avro schema fingerprints can be found in the
@@ -774,107 +776,6 @@ pub enum UuidSchema {
     Fixed(FixedSchema),
 }
 
-/// A description of a Union schema
-#[derive(Debug, Clone)]
-pub struct UnionSchema {
-    /// The schemas that make up this union
-    pub(crate) schemas: Vec<Schema>,
-    // Used to ensure uniqueness of schema inputs, and provide constant time 
finding of the
-    // schema index given a value.
-    // **NOTE** that this approach does not work for named types, and will 
have to be modified
-    // to support that. A simple solution is to also keep a mapping of the 
names used.
-    variant_index: BTreeMap<SchemaKind, usize>,
-}
-
-impl UnionSchema {
-    /// Creates a new UnionSchema from a vector of schemas.
-    ///
-    /// # Errors
-    /// Will return an error if `schemas` has duplicate unnamed schemas or if 
`schemas`
-    /// contains a union.
-    pub fn new(schemas: Vec<Schema>) -> AvroResult<Self> {
-        let mut vindex = BTreeMap::new();
-        for (i, schema) in schemas.iter().enumerate() {
-            if let Schema::Union(_) = schema {
-                return Err(Details::GetNestedUnion.into());
-            }
-            if !schema.is_named() && vindex.insert(SchemaKind::from(schema), 
i).is_some() {
-                return Err(Details::GetUnionDuplicate.into());
-            }
-        }
-        Ok(UnionSchema {
-            schemas,
-            variant_index: vindex,
-        })
-    }
-
-    /// Returns a slice to all variants of this schema.
-    pub fn variants(&self) -> &[Schema] {
-        &self.schemas
-    }
-
-    /// Returns true if the any of the variants of this `UnionSchema` is 
`Null`.
-    pub fn is_nullable(&self) -> bool {
-        self.schemas.iter().any(|x| matches!(x, Schema::Null))
-    }
-
-    /// Optionally returns a reference to the schema matched by this value, as 
well as its position
-    /// within this union.
-    ///
-    /// Extra arguments:
-    /// - `known_schemata` - mapping between `Name` and `Schema` - if passed, 
additional external schemas would be used to resolve references.
-    pub fn find_schema_with_known_schemata<S: Borrow<Schema> + Debug>(
-        &self,
-        value: &types::Value,
-        known_schemata: Option<&HashMap<Name, S>>,
-        enclosing_namespace: &Namespace,
-    ) -> Option<(usize, &Schema)> {
-        let schema_kind = SchemaKind::from(value);
-        if let Some(&i) = self.variant_index.get(&schema_kind) {
-            // fast path
-            Some((i, &self.schemas[i]))
-        } else {
-            // slow path (required for matching logical or named types)
-
-            // first collect what schemas we already know
-            let mut collected_names: HashMap<Name, &Schema> = known_schemata
-                .map(|names| {
-                    names
-                        .iter()
-                        .map(|(name, schema)| (name.clone(), schema.borrow()))
-                        .collect()
-                })
-                .unwrap_or_default();
-
-            self.schemas.iter().enumerate().find(|(_, schema)| {
-                let resolved_schema = ResolvedSchema::new_with_known_schemata(
-                    vec![*schema],
-                    enclosing_namespace,
-                    &collected_names,
-                )
-                .expect("Schema didn't successfully parse");
-                let resolved_names = resolved_schema.names_ref;
-
-                // extend known schemas with just resolved names
-                collected_names.extend(resolved_names);
-                let namespace = &schema.namespace().or_else(|| 
enclosing_namespace.clone());
-
-                value
-                    .clone()
-                    .resolve_internal(schema, &collected_names, namespace, 
&None)
-                    .is_ok()
-            })
-        }
-    }
-}
-
-// No need to compare variant_index, it is derivative of schemas.
-impl PartialEq for UnionSchema {
-    fn eq(&self, other: &UnionSchema) -> bool {
-        self.schemas.eq(&other.schemas)
-    }
-}
-
 type DecimalMetadata = usize;
 pub(crate) type Precision = DecimalMetadata;
 pub(crate) type Scale = DecimalMetadata;
diff --git a/avro/src/schema/union.rs b/avro/src/schema/union.rs
new file mode 100644
index 0000000..a707279
--- /dev/null
+++ b/avro/src/schema/union.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::AvroResult;
+use crate::error::Details;
+use crate::schema::{Name, Namespace, ResolvedSchema, Schema, SchemaKind};
+use crate::types;
+use std::borrow::Borrow;
+use std::collections::{BTreeMap, HashMap};
+use std::fmt::Debug;
+
+/// A description of a Union schema
+#[derive(Debug, Clone)]
+pub struct UnionSchema {
+    /// The schemas that make up this union
+    pub(crate) schemas: Vec<Schema>,
+    // Used to ensure uniqueness of schema inputs, and provide constant time 
finding of the
+    // schema index given a value.
+    // **NOTE** that this approach does not work for named types, and will 
have to be modified
+    // to support that. A simple solution is to also keep a mapping of the 
names used.
+    variant_index: BTreeMap<SchemaKind, usize>,
+}
+
+impl UnionSchema {
+    /// Creates a new UnionSchema from a vector of schemas.
+    ///
+    /// # Errors
+    /// Will return an error if `schemas` has duplicate unnamed schemas or if 
`schemas`
+    /// contains a union.
+    pub fn new(schemas: Vec<Schema>) -> AvroResult<Self> {
+        let mut vindex = BTreeMap::new();
+        for (i, schema) in schemas.iter().enumerate() {
+            if let Schema::Union(_) = schema {
+                return Err(Details::GetNestedUnion.into());
+            }
+            if !schema.is_named() && vindex.insert(SchemaKind::from(schema), 
i).is_some() {
+                return Err(Details::GetUnionDuplicate.into());
+            }
+        }
+        Ok(UnionSchema {
+            schemas,
+            variant_index: vindex,
+        })
+    }
+
+    /// Returns a slice to all variants of this schema.
+    pub fn variants(&self) -> &[Schema] {
+        &self.schemas
+    }
+
+    /// Returns true if the any of the variants of this `UnionSchema` is 
`Null`.
+    pub fn is_nullable(&self) -> bool {
+        self.schemas.iter().any(|x| matches!(x, Schema::Null))
+    }
+
+    /// Optionally returns a reference to the schema matched by this value, as 
well as its position
+    /// within this union.
+    ///
+    /// Extra arguments:
+    /// - `known_schemata` - mapping between `Name` and `Schema` - if passed, 
additional external schemas would be used to resolve references.
+    pub fn find_schema_with_known_schemata<S: Borrow<Schema> + Debug>(
+        &self,
+        value: &types::Value,
+        known_schemata: Option<&HashMap<Name, S>>,
+        enclosing_namespace: &Namespace,
+    ) -> Option<(usize, &Schema)> {
+        let schema_kind = SchemaKind::from(value);
+        if let Some(&i) = self.variant_index.get(&schema_kind) {
+            // fast path
+            Some((i, &self.schemas[i]))
+        } else {
+            // slow path (required for matching logical or named types)
+
+            // first collect what schemas we already know
+            let mut collected_names: HashMap<Name, &Schema> = known_schemata
+                .map(|names| {
+                    names
+                        .iter()
+                        .map(|(name, schema)| (name.clone(), schema.borrow()))
+                        .collect()
+                })
+                .unwrap_or_default();
+
+            self.schemas.iter().enumerate().find(|(_, schema)| {
+                let resolved_schema = ResolvedSchema::new_with_known_schemata(
+                    vec![*schema],
+                    enclosing_namespace,
+                    &collected_names,
+                )
+                .expect("Schema didn't successfully parse");
+                let resolved_names = resolved_schema.names_ref;
+
+                // extend known schemas with just resolved names
+                collected_names.extend(resolved_names);
+                let namespace = &schema.namespace().or_else(|| 
enclosing_namespace.clone());
+
+                value
+                    .clone()
+                    .resolve_internal(schema, &collected_names, namespace, 
&None)
+                    .is_ok()
+            })
+        }
+    }
+}
+
+// No need to compare variant_index, it is derivative of schemas.
+impl PartialEq for UnionSchema {
+    fn eq(&self, other: &UnionSchema) -> bool {
+        self.schemas.eq(&other.schemas)
+    }
+}

Reply via email to