This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f7208b0 Add method `Schema::parse_str_with_list` to parse root
schemas with references (#104)
f7208b0 is described below
commit f7208b0f5b8901e72b9c6d6b1e2a4a77ee66c2a3
Author: Robert Yokota <[email protected]>
AuthorDate: Sat Jan 18 02:14:50 2025 -0800
Add method `Schema::parse_str_with_list` to parse root schemas with
references (#104)
* Add method `Schema::parse_str_with_list`
Add a new method `Schema::parse_str_with_list` that can parse
a root schema along with a list of named schemas. This method
can be used to parse a root union that has named schema references,
for example.
This new method combines the functionality of `Schema::parse_str`
with `Schema::parse_list`.
* Run cargo fmt
* Issue #104 - Minor cleanup and add more tests
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---------
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
---
avro/src/schema.rs | 163 +++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 152 insertions(+), 11 deletions(-)
diff --git a/avro/src/schema.rs b/avro/src/schema.rs
index 1289c82..1757e98 100644
--- a/avro/src/schema.rs
+++ b/avro/src/schema.rs
@@ -1064,7 +1064,7 @@ impl Schema {
parser.parse_str(input)
}
- /// Create a array of `Schema`'s from a list of named JSON Avro schemas
(Record, Enum, and
+ /// Create an array of `Schema`'s from a list of named JSON Avro schemas
(Record, Enum, and
/// Fixed).
///
/// It is allowed that the schemas have cross-dependencies; these will be
resolved
@@ -1096,6 +1096,45 @@ impl Schema {
parser.parse_list()
}
+ /// Create a `Schema` from a string representing a JSON Avro schema,
+ /// along with an array of `Schema`'s from a list of named JSON Avro
schemas (Record, Enum, and
+ /// Fixed).
+ ///
+ /// It is allowed that the schemas have cross-dependencies; these will be
resolved
+ /// during parsing.
+ ///
+ /// If two of the named input schemas have the same fullname, an Error
will be returned.
+ ///
+ /// # Arguments
+ /// * `schema` - the JSON string of the schema to parse
+ /// * `schemata` - a slice of additional schemas that is used to resolve
cross-references
+ pub fn parse_str_with_list(schema: &str, schemata: &[&str]) ->
AvroResult<Schema> {
+ let mut input_schemas: HashMap<Name, Value> =
HashMap::with_capacity(schemata.len());
+ let mut input_order: Vec<Name> = Vec::with_capacity(schemata.len());
+ for json in schemata {
+ let schema: Value =
serde_json::from_str(json).map_err(Error::ParseSchemaJson)?;
+ if let Value::Object(inner) = &schema {
+ let name = Name::parse(inner, &None)?;
+ if let Some(_previous) = input_schemas.insert(name.clone(),
schema) {
+ return Err(Error::NameCollision(name.fullname(None)));
+ }
+ input_order.push(name);
+ } else {
+ return Err(Error::GetNameField);
+ }
+ }
+ let mut parser = Parser {
+ input_schemas,
+ resolving_schemas: HashMap::default(),
+ input_order,
+ parsed_schemas: HashMap::with_capacity(schemata.len()),
+ };
+ parser.parse_input_schemas()?;
+
+ let value =
serde_json::from_str(schema).map_err(Error::ParseSchemaJson)?;
+ parser.parse(&value, &None)
+ }
+
/// Create a `Schema` from a reader which implements [`Read`].
pub fn parse_reader(reader: &mut (impl Read + ?Sized)) ->
AvroResult<Schema> {
let mut buf = String::new();
@@ -1214,6 +1253,21 @@ impl Parser {
/// Create an array of `Schema`'s from an iterator of JSON Avro schemas.
It is allowed that
/// the schemas have cross-dependencies; these will be resolved during
parsing.
fn parse_list(&mut self) -> Result<Vec<Schema>, Error> {
+ self.parse_input_schemas()?;
+
+ let mut parsed_schemas = Vec::with_capacity(self.parsed_schemas.len());
+ for name in self.input_order.drain(0..) {
+ let parsed = self
+ .parsed_schemas
+ .remove(&name)
+ .expect("One of the input schemas was unexpectedly not
parsed");
+ parsed_schemas.push(parsed);
+ }
+ Ok(parsed_schemas)
+ }
+
+ /// Convert the input schemas to parsed_schemas
+ fn parse_input_schemas(&mut self) -> Result<(), Error> {
while !self.input_schemas.is_empty() {
let next_name = self
.input_schemas
@@ -1229,16 +1283,7 @@ impl Parser {
self.parsed_schemas
.insert(get_schema_type_name(name, value), parsed);
}
-
- let mut parsed_schemas = Vec::with_capacity(self.parsed_schemas.len());
- for name in self.input_order.drain(0..) {
- let parsed = self
- .parsed_schemas
- .remove(&name)
- .expect("One of the input schemas was unexpectedly not
parsed");
- parsed_schemas.push(parsed);
- }
- Ok(parsed_schemas)
+ Ok(())
}
/// Create a `Schema` from a `serde_json::Value` representing a JSON Avro
@@ -2695,6 +2740,102 @@ mod tests {
Ok(())
}
+ #[test]
+ fn avro_rs_104_test_root_union_of_records() -> TestResult {
+ // A and B are the same except the name.
+ let schema_str_a = r#"{
+ "name": "A",
+ "type": "record",
+ "fields": [
+ {"name": "field_one", "type": "float"}
+ ]
+ }"#;
+
+ let schema_str_b = r#"{
+ "name": "B",
+ "type": "record",
+ "fields": [
+ {"name": "field_one", "type": "float"}
+ ]
+ }"#;
+
+ let schema_str_c = r#"["A", "B"]"#;
+
+ let schema_c = Schema::parse_str_with_list(schema_str_c,
&[schema_str_a, schema_str_b])?;
+
+ let schema_c_expected = Schema::Union(UnionSchema::new(vec![
+ Schema::Ref {
+ name: Name::new("A")?,
+ },
+ Schema::Ref {
+ name: Name::new("B")?,
+ },
+ ])?);
+
+ assert_eq!(schema_c, schema_c_expected);
+ Ok(())
+ }
+
+ #[test]
+ fn avro_rs_104_test_root_union_of_records_name_collision() -> TestResult {
+ // A and B are exactly the same.
+ let schema_str_a1 = r#"{
+ "name": "A",
+ "type": "record",
+ "fields": [
+ {"name": "field_one", "type": "float"}
+ ]
+ }"#;
+
+ let schema_str_a2 = r#"{
+ "name": "A",
+ "type": "record",
+ "fields": [
+ {"name": "field_one", "type": "float"}
+ ]
+ }"#;
+
+ let schema_str_c = r#"["A", "A"]"#;
+
+ match Schema::parse_str_with_list(schema_str_c, &[schema_str_a1,
schema_str_a2]) {
+ Ok(_) => unreachable!("Expected an error that the name is already
defined"),
+ Err(e) => assert_eq!(
+ e.to_string(),
+ "Two schemas with the same fullname were given: \"A\""
+ ),
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn avro_rs_104_test_root_union_of_records_no_name() -> TestResult {
+ let schema_str_a = r#"{
+ "name": "A",
+ "type": "record",
+ "fields": [
+ {"name": "field_one", "type": "float"}
+ ]
+ }"#;
+
+ // B has no name field.
+ let schema_str_b = r#"{
+ "type": "record",
+ "fields": [
+ {"name": "field_one", "type": "float"}
+ ]
+ }"#;
+
+ let schema_str_c = r#"["A", "A"]"#;
+
+ match Schema::parse_str_with_list(schema_str_c, &[schema_str_a,
schema_str_b]) {
+ Ok(_) => unreachable!("Expected an error that schema_str_b is
missing a name field"),
+ Err(e) => assert_eq!(e.to_string(), "No `name` field"),
+ }
+
+ Ok(())
+ }
+
#[test]
fn avro_3584_test_recursion_records() -> TestResult {
// A and B are the same except the name.