This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new f7208b0  Add method `Schema::parse_str_with_list` to parse root 
schemas with references (#104)
f7208b0 is described below

commit f7208b0f5b8901e72b9c6d6b1e2a4a77ee66c2a3
Author: Robert Yokota <[email protected]>
AuthorDate: Sat Jan 18 02:14:50 2025 -0800

    Add method `Schema::parse_str_with_list` to parse root schemas with 
references (#104)
    
    * Add method `Schema::parse_str_with_list`
    
    Add a new method `Schema::parse_str_with_list` that can parse
    a root schema along with a list of named schemas.  This method
    can be used to parse a root union that has named schema references,
    for example.
    
    This new method combines the functionality of `Schema::parse_str`
    with `Schema::parse_list`.
    
    * Run cargo fmt
    
    * Issue #104 - Minor cleanup and add more tests
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    ---------
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
---
 avro/src/schema.rs | 163 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 152 insertions(+), 11 deletions(-)

diff --git a/avro/src/schema.rs b/avro/src/schema.rs
index 1289c82..1757e98 100644
--- a/avro/src/schema.rs
+++ b/avro/src/schema.rs
@@ -1064,7 +1064,7 @@ impl Schema {
         parser.parse_str(input)
     }
 
-    /// Create a array of `Schema`'s from a list of named JSON Avro schemas 
(Record, Enum, and
+    /// Create an array of `Schema`'s from a list of named JSON Avro schemas 
(Record, Enum, and
     /// Fixed).
     ///
     /// It is allowed that the schemas have cross-dependencies; these will be 
resolved
@@ -1096,6 +1096,45 @@ impl Schema {
         parser.parse_list()
     }
 
+    /// Create a `Schema` from a string representing a JSON Avro schema,
+    /// along with an array of `Schema`'s from a list of named JSON Avro 
schemas (Record, Enum, and
+    /// Fixed).
+    ///
+    /// It is allowed that the schemas have cross-dependencies; these will be 
resolved
+    /// during parsing.
+    ///
+    /// If two of the named input schemas have the same fullname, an Error 
will be returned.
+    ///
+    /// # Arguments
+    /// * `schema` - the JSON string of the schema to parse
+    /// * `schemata` - a slice of additional schemas that is used to resolve 
cross-references
+    pub fn parse_str_with_list(schema: &str, schemata: &[&str]) -> 
AvroResult<Schema> {
+        let mut input_schemas: HashMap<Name, Value> = 
HashMap::with_capacity(schemata.len());
+        let mut input_order: Vec<Name> = Vec::with_capacity(schemata.len());
+        for json in schemata {
+            let schema: Value = 
serde_json::from_str(json).map_err(Error::ParseSchemaJson)?;
+            if let Value::Object(inner) = &schema {
+                let name = Name::parse(inner, &None)?;
+                if let Some(_previous) = input_schemas.insert(name.clone(), 
schema) {
+                    return Err(Error::NameCollision(name.fullname(None)));
+                }
+                input_order.push(name);
+            } else {
+                return Err(Error::GetNameField);
+            }
+        }
+        let mut parser = Parser {
+            input_schemas,
+            resolving_schemas: HashMap::default(),
+            input_order,
+            parsed_schemas: HashMap::with_capacity(schemata.len()),
+        };
+        parser.parse_input_schemas()?;
+
+        let value = 
serde_json::from_str(schema).map_err(Error::ParseSchemaJson)?;
+        parser.parse(&value, &None)
+    }
+
     /// Create a `Schema` from a reader which implements [`Read`].
     pub fn parse_reader(reader: &mut (impl Read + ?Sized)) -> 
AvroResult<Schema> {
         let mut buf = String::new();
@@ -1214,6 +1253,21 @@ impl Parser {
     /// Create an array of `Schema`'s from an iterator of JSON Avro schemas. 
It is allowed that
     /// the schemas have cross-dependencies; these will be resolved during 
parsing.
     fn parse_list(&mut self) -> Result<Vec<Schema>, Error> {
+        self.parse_input_schemas()?;
+
+        let mut parsed_schemas = Vec::with_capacity(self.parsed_schemas.len());
+        for name in self.input_order.drain(0..) {
+            let parsed = self
+                .parsed_schemas
+                .remove(&name)
+                .expect("One of the input schemas was unexpectedly not 
parsed");
+            parsed_schemas.push(parsed);
+        }
+        Ok(parsed_schemas)
+    }
+
+    /// Convert the input schemas to parsed_schemas
+    fn parse_input_schemas(&mut self) -> Result<(), Error> {
         while !self.input_schemas.is_empty() {
             let next_name = self
                 .input_schemas
@@ -1229,16 +1283,7 @@ impl Parser {
             self.parsed_schemas
                 .insert(get_schema_type_name(name, value), parsed);
         }
-
-        let mut parsed_schemas = Vec::with_capacity(self.parsed_schemas.len());
-        for name in self.input_order.drain(0..) {
-            let parsed = self
-                .parsed_schemas
-                .remove(&name)
-                .expect("One of the input schemas was unexpectedly not 
parsed");
-            parsed_schemas.push(parsed);
-        }
-        Ok(parsed_schemas)
+        Ok(())
     }
 
     /// Create a `Schema` from a `serde_json::Value` representing a JSON Avro
@@ -2695,6 +2740,102 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn avro_rs_104_test_root_union_of_records() -> TestResult {
+        // A and B are the same except the name.
+        let schema_str_a = r#"{
+            "name": "A",
+            "type": "record",
+            "fields": [
+                {"name": "field_one", "type": "float"}
+            ]
+        }"#;
+
+        let schema_str_b = r#"{
+            "name": "B",
+            "type": "record",
+            "fields": [
+                {"name": "field_one", "type": "float"}
+            ]
+        }"#;
+
+        let schema_str_c = r#"["A", "B"]"#;
+
+        let schema_c = Schema::parse_str_with_list(schema_str_c, 
&[schema_str_a, schema_str_b])?;
+
+        let schema_c_expected = Schema::Union(UnionSchema::new(vec![
+            Schema::Ref {
+                name: Name::new("A")?,
+            },
+            Schema::Ref {
+                name: Name::new("B")?,
+            },
+        ])?);
+
+        assert_eq!(schema_c, schema_c_expected);
+        Ok(())
+    }
+
+    #[test]
+    fn avro_rs_104_test_root_union_of_records_name_collision() -> TestResult {
+        // A and B are exactly the same.
+        let schema_str_a1 = r#"{
+            "name": "A",
+            "type": "record",
+            "fields": [
+                {"name": "field_one", "type": "float"}
+            ]
+        }"#;
+
+        let schema_str_a2 = r#"{
+            "name": "A",
+            "type": "record",
+            "fields": [
+                {"name": "field_one", "type": "float"}
+            ]
+        }"#;
+
+        let schema_str_c = r#"["A", "A"]"#;
+
+        match Schema::parse_str_with_list(schema_str_c, &[schema_str_a1, 
schema_str_a2]) {
+            Ok(_) => unreachable!("Expected an error that the name is already 
defined"),
+            Err(e) => assert_eq!(
+                e.to_string(),
+                "Two schemas with the same fullname were given: \"A\""
+            ),
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn avro_rs_104_test_root_union_of_records_no_name() -> TestResult {
+        let schema_str_a = r#"{
+            "name": "A",
+            "type": "record",
+            "fields": [
+                {"name": "field_one", "type": "float"}
+            ]
+        }"#;
+
+        // B has no name field.
+        let schema_str_b = r#"{
+            "type": "record",
+            "fields": [
+                {"name": "field_one", "type": "float"}
+            ]
+        }"#;
+
+        let schema_str_c = r#"["A", "A"]"#;
+
+        match Schema::parse_str_with_list(schema_str_c, &[schema_str_a, 
schema_str_b]) {
+            Ok(_) => unreachable!("Expected an error that schema_str_b is 
missing a name field"),
+            Err(e) => assert_eq!(e.to_string(), "No `name` field"),
+        }
+
+        Ok(())
+    }
+
     #[test]
     fn avro_3584_test_recursion_records() -> TestResult {
         // A and B are the same except the name.

Reply via email to