This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch fix/infinite_loop_icf in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 5b00e32ee17cec0be7daa024270f93ee1bf3d247 Author: default <[email protected]> AuthorDate: Wed Jan 21 12:53:58 2026 +0000 fix: Support recursive types for `Schema::independent_canonical_form` --- avro/src/schema/mod.rs | 76 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 7f453f4..356d568 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -614,7 +614,7 @@ impl Schema { /// https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas pub fn independent_canonical_form(&self, schemata: &[Schema]) -> Result<String, Error> { let mut this = self.clone(); - this.denormalize(schemata)?; + this.denormalize(schemata, &mut HashSet::with_capacity(schemata.len()))?; Ok(this.canonical_form()) } @@ -870,7 +870,19 @@ impl Schema { UnionSchema::new(schemas).map(Schema::Union) } - fn denormalize(&mut self, schemata: &[Schema]) -> AvroResult<()> { + fn denormalize( + &mut self, + schemata: &[Schema], + defined_names: &mut HashSet<Name>, + ) -> AvroResult<()> { + // If this name already exists in this schema we can reference it. + // This makes the denormalized form as small as possible and prevent infinite loops for recursive types. + if let Some(name) = self.name() + && defined_names.contains(name) + { + *self = Schema::Ref { name: name.clone() }; + return Ok(()); + } match self { Schema::Ref { name } => { let replacement_schema = schemata @@ -878,28 +890,39 @@ impl Schema { .find(|s| s.name().map(|n| *n == *name).unwrap_or(false)); if let Some(schema) = replacement_schema { let mut denorm = schema.clone(); - denorm.denormalize(schemata)?; + denorm.denormalize(schemata, defined_names)?; *self = denorm; } else { return Err(Details::SchemaResolutionError(name.clone()).into()); } } Schema::Record(record_schema) => { + defined_names.insert(record_schema.name.clone()); for field in &mut record_schema.fields { - field.schema.denormalize(schemata)?; + field.schema.denormalize(schemata, defined_names)?; } } Schema::Array(array_schema) => { - array_schema.items.denormalize(schemata)?; + array_schema.items.denormalize(schemata, defined_names)?; } Schema::Map(map_schema) => { - map_schema.types.denormalize(schemata)?; + map_schema.types.denormalize(schemata, defined_names)?; } Schema::Union(union_schema) => { for schema in &mut union_schema.schemas { - schema.denormalize(schemata)?; + schema.denormalize(schemata, defined_names)?; } } + Schema::Enum(EnumSchema { name, .. }) + | Schema::Fixed(FixedSchema { name, .. }) + | Schema::Decimal(DecimalSchema { + inner: InnerDecimalSchema::Fixed(FixedSchema { name, .. }), + .. + }) + | Schema::Uuid(UuidSchema::Fixed(FixedSchema { name, .. })) + | Schema::Duration(FixedSchema { name, .. }) => { + defined_names.insert(name.clone()); + } _ => (), } Ok(()) @@ -6453,4 +6476,43 @@ mod tests { Ok(()) } + + #[test] + fn avro_rs_xxx_independent_canonical_form() -> TestResult { + let record = Schema::Record( + RecordSchema::builder() + .name(Name::new("root")?) + .fields(vec![ + RecordField::builder() + .name("node".into()) + .schema(Schema::Ref { + name: Name::new("node")?, + }) + .build(), + ]) + .build(), + ); + let node = Schema::Record( + RecordSchema::builder() + .name(Name::new("node")?) + .fields(vec![ + RecordField::builder() + .name("children".into()) + .schema(Schema::union(vec![ + Schema::Null, + Schema::Ref { + name: Name::new("node")?, + }, + ])?) + .build(), + ]) + .build(), + ); + let icf = record.independent_canonical_form(&[node])?; + assert_eq!( + icf, + r#"{"name":"root","type":"record","fields":[{"name":"node","type":{"name":"node","type":"record","fields":[{"name":"children","type":["null","node"]}]}}]}"# + ); + Ok(()) + } }
