This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f502392 fix: Don't allow resolving a schema that is already known
(#444)
f502392 is described below
commit f502392498d3d7714789f581589b67917ad77da3
Author: Martin Grigorov <[email protected]>
AuthorDate: Tue Jan 27 22:15:55 2026 +0200
fix: Don't allow resolving a schema that is already known (#444)
The `known_schemata` NamesRef is used to resolve schema references, i.e.
if a schema ref is not referring to a schema that has been seen/defined
earlier in the same schema definition (JSON) then we consult a collection
of known (previously parsed) schemas.
If a newly parsed schema is defined earlier then an
AmbiguousSchemaDefinition error is returned.
The same should be done when a newly parsed schema clashes with a schema
with the very same name in `known_schemata`
---
avro/src/schema/resolve.rs | 114 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 86 insertions(+), 28 deletions(-)
diff --git a/avro/src/schema/resolve.rs b/avro/src/schema/resolve.rs
index 61b2e98..9274e61 100644
--- a/avro/src/schema/resolve.rs
+++ b/avro/src/schema/resolve.rs
@@ -171,18 +171,24 @@ pub fn resolve_names<'s, 'n>(
})
| Schema::Duration(FixedSchema { name, .. }) => {
let fully_qualified_name =
name.fully_qualified_name(enclosing_namespace);
- if names.insert(fully_qualified_name.clone(), schema).is_some() {
+ if names.contains_key(&fully_qualified_name)
+ || known_schemata.contains_key(&fully_qualified_name)
+ {
Err(Details::AmbiguousSchemaDefinition(fully_qualified_name).into())
} else {
+ names.insert(fully_qualified_name, schema);
Ok(())
}
}
Schema::Record(RecordSchema { name, fields, .. }) => {
let fully_qualified_name =
name.fully_qualified_name(enclosing_namespace);
- if names.insert(fully_qualified_name.clone(), schema).is_some() {
+ if names.contains_key(&fully_qualified_name)
+ || known_schemata.contains_key(&fully_qualified_name)
+ {
Err(Details::AmbiguousSchemaDefinition(fully_qualified_name).into())
} else {
- let record_namespace = fully_qualified_name.namespace;
+ let record_namespace = fully_qualified_name.namespace.clone();
+ names.insert(fully_qualified_name, schema);
for field in fields {
resolve_names(&field.schema, names, &record_namespace,
known_schemata)?
}
@@ -217,10 +223,13 @@ pub fn resolve_names_with_schemata<'s, 'n>(
#[cfg(test)]
mod tests {
- use crate::Schema;
- use crate::schema::Name;
- use crate::schema::resolve::{ResolvedOwnedSchema, ResolvedSchema};
+ use super::{ResolvedOwnedSchema, ResolvedSchema};
+ use crate::{
+ Schema,
+ schema::{Name, NamesRef},
+ };
use apache_avro_test_helper::TestResult;
+ use std::collections::HashMap;
#[test]
fn avro_3448_test_proper_resolution_inner_record_inherited_namespace() ->
TestResult {
@@ -254,7 +263,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.inner_record_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -295,7 +304,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.inner_record_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -331,7 +340,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.inner_enum_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -367,7 +376,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.inner_enum_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -403,7 +412,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.inner_fixed_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -439,7 +448,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.inner_fixed_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -481,7 +490,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "inner_space.inner_record_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -518,7 +527,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "inner_space.inner_enum_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -555,7 +564,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "inner_space.inner_fixed_name"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -608,7 +617,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 3);
for s in &[
"space.record_name",
@@ -666,7 +675,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 3);
for s in &[
"space.record_name",
@@ -725,7 +734,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 3);
for s in &[
"space.record_name",
@@ -770,7 +779,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.in_array_record"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -811,7 +820,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
assert_eq!(rs.get_names().len(), 2);
for s in &["space.record_name", "space.in_map_record"] {
assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -848,7 +857,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
// confirm we have expected 2 full-names
assert_eq!(rs.get_names().len(), 2);
@@ -857,8 +866,8 @@ mod tests {
}
// convert Schema back to JSON string
- let schema_str = serde_json::to_string(&schema).expect("test failed");
- let _schema = Schema::parse_str(&schema_str).expect("test failed");
+ let schema_str = serde_json::to_string(&schema)?;
+ let _schema = Schema::parse_str(&schema_str)?;
assert_eq!(schema, _schema);
Ok(())
@@ -892,7 +901,7 @@ mod tests {
}
"#;
let schema = Schema::parse_str(schema)?;
- let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't
successfully parse");
+ let rs = ResolvedSchema::new(&schema)?;
// confirm we have expected 2 full-names
assert_eq!(rs.get_names().len(), 2);
@@ -901,8 +910,8 @@ mod tests {
}
// convert Schema back to JSON string
- let schema_str = serde_json::to_string(&schema).expect("test failed");
- let _schema = Schema::parse_str(&schema_str).expect("test failed");
+ let schema_str = serde_json::to_string(&schema)?;
+ let _schema = Schema::parse_str(&schema_str)?;
assert_eq!(schema, _schema);
Ok(())
@@ -931,7 +940,7 @@ mod tests {
]
}"#,
)?;
- let _resolved = ResolvedSchema::try_from(&schema)?;
+ let _resolved = ResolvedSchema::new(&schema)?;
let _resolved_owned = ResolvedOwnedSchema::try_from(schema)?;
Ok(())
@@ -962,9 +971,58 @@ mod tests {
]
}"#,
)?;
- let _resolved = ResolvedSchema::try_from(&schema)?;
+ let _resolved = ResolvedSchema::new(&schema)?;
let _resolved_owned = ResolvedOwnedSchema::try_from(schema)?;
Ok(())
}
+
+ #[test]
+ fn avro_rs_444_do_not_allow_duplicate_names_in_known_schemata() ->
TestResult {
+ let schema = Schema::parse_str(
+ r#"{
+ "name": "foo",
+ "type": "record",
+ "fields": [
+ {
+ "name": "a",
+ "type": {
+ "type": "fixed",
+ "size": 16,
+ "logicalType": "decimal",
+ "precision": 4,
+ "scale": 2,
+ "name": "bar"
+ }
+ },
+ {
+ "name": "b",
+ "type": "bar"
+ },
+ {
+ "name": "c",
+ "type": {
+ "type": "fixed",
+ "size": 16,
+ "logicalType": "uuid",
+ "name": "duplicated_name"
+ }
+ }
+ ]
+ }"#,
+ )?;
+
+ let mut known_schemata: NamesRef = HashMap::default();
+ known_schemata.insert("duplicated_name".try_into()?, &Schema::Boolean);
+
+ let result = ResolvedSchema::new_with_known_schemata(vec![&schema],
&None, &known_schemata)
+ .unwrap_err();
+
+ assert_eq!(
+ result.to_string(),
+ "Two named schema defined for same fullname: duplicated_name."
+ );
+
+ Ok(())
+ }
}