This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/set_instead_of_map in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 672414425c40ed75eddc93251e367f3857eac6ba Author: Kriskras99 <[email protected]> AuthorDate: Thu Feb 19 10:41:20 2026 +0000 feat: Use `HashSet` instead of `HashMap` for `AvroSchemaComponent` --- avro/src/serde/derive.rs | 156 +++++++++++++++++++++--------------- avro/src/serde/with.rs | 53 ++++++------ avro/tests/avro_schema_component.rs | 6 +- avro/tests/get_record_fields.rs | 8 +- avro_derive/src/lib.rs | 37 ++++----- avro_derive/tests/derive.rs | 20 +++-- 6 files changed, 153 insertions(+), 127 deletions(-) diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index c631db9..066fe00 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -17,10 +17,10 @@ use crate::Schema; use crate::schema::{ - FixedSchema, Name, Names, Namespace, RecordField, RecordSchema, UnionSchema, UuidSchema, + FixedSchema, Name, Namespace, RecordField, RecordSchema, UnionSchema, UuidSchema, }; use std::borrow::Cow; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; /// Trait for types that serve as an Avro data model. /// @@ -175,15 +175,15 @@ use std::collections::HashMap; /// /// 1. In combination with `#[serde(with = "path::to::module)]` /// -/// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut Names, &Namespace) -> Schema` -/// and `fn get_record_fields_in_ctxt(usize, &mut Names, &Namespace) -> Option<Vec<RecordField>>` in the module provided +/// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut HashSet<Name>, &Namespace) -> Schema` +/// and `fn get_record_fields_in_ctxt(usize, &mut HashSet<Name>, &Namespace) -> Option<Vec<RecordField>>` in the module provided /// to the Serde attribute. See [`AvroSchemaComponent`] for details on how to implement those /// functions. /// /// 2. By providing a function directly, `#[avro(with = some_fn)]`. /// /// To get the schema, it will call the function provided. It must have the signature -/// `fn(&mut Names, &Namespace) -> Schema`. When this is used for a `transparent` struct, the +/// `fn(&mut HashSet<Name>, &Namespace) -> Schema`. When this is used for a `transparent` struct, the /// default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used. /// This is only recommended for primitive types, as the default implementation cannot be efficiently /// implemented for complex types. @@ -208,15 +208,16 @@ pub trait AvroSchema { /// /// For example, you have a custom integer type: /// ``` -/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Names, Namespace, RecordField}}; +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField}}; +/// # use std::collections::HashSet; /// // Make sure to implement `Serialize` and `Deserialize` to use the right serialization methods /// pub struct U24([u8; 3]); /// impl AvroSchemaComponent for U24 { -/// fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { +/// fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { /// Schema::Int /// } /// -/// fn get_record_fields_in_ctxt(_: usize, _: &mut Names, _: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { /// None // A Schema::Int is not a Schema::Record so there are no fields to return /// } ///} @@ -227,17 +228,18 @@ pub trait AvroSchema { /// To construct a schema for a type is "transparent", such as for smart pointers, simply /// pass through the arguments to the inner type: /// ``` -/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Names, Namespace, RecordField}}; +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField}}; /// # use serde::{Serialize, Deserialize}; +/// # use std::collections::HashSet; /// #[derive(Serialize, Deserialize)] /// #[serde(transparent)] // This attribute is important for all passthrough implementations! /// pub struct Transparent<T>(T); /// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> { -/// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { +/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { /// T::get_schema_in_ctxt(named_schemas, enclosing_namespace) /// } /// -/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { /// T::get_record_fields_in_ctxt(first_field_position, named_schemas, enclosing_namespace) /// } ///} @@ -256,9 +258,9 @@ pub trait AvroSchema { /// - Even if your schema is not a record, still implement the function and just return `None` /// /// ``` -/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Names, Namespace, RecordField, RecordSchema}}; +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField, RecordSchema}}; /// # use serde::{Serialize, Deserialize}; -/// # use std::time::Duration; +/// # use std::{time::Duration, collections::HashSet}; /// pub struct Foo { /// one: String, /// two: i32, @@ -266,26 +268,25 @@ pub trait AvroSchema { /// } /// /// impl AvroSchemaComponent for Foo { -/// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { +/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { /// // Create the fully qualified name for your type given the enclosing namespace /// let name = Name::new("Foo").unwrap().fully_qualified_name(enclosing_namespace); -/// if named_schemas.contains_key(&name) { +/// if named_schemas.contains(&name) { /// Schema::Ref { name } /// } else { /// let enclosing_namespace = &name.namespace; -/// // This is needed because otherwise recursive types will recurse forever and cause a stack overflow -/// named_schemas.insert(name.clone(), Schema::Ref { name: name.clone() }); +/// // Do this before you start creating the schema, as otherwise recursive types will cause infinite recursion. +/// named_schemas.insert(name.clone()); /// let schema = Schema::Record(RecordSchema::builder() /// .name(name.clone()) /// .fields(Self::get_record_fields_in_ctxt(0, named_schemas, enclosing_namespace).expect("Impossible!")) /// .build() /// ); -/// named_schemas.insert(name, schema.clone()); /// schema /// } /// } /// -/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { /// Some(vec![ /// RecordField::builder() /// .name("one") @@ -308,7 +309,10 @@ pub trait AvroSchema { /// ``` pub trait AvroSchemaComponent { /// Get the schema for this component - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema; + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema; /// Get the fields of this schema if it is a record. /// @@ -318,7 +322,7 @@ pub trait AvroSchemaComponent { /// implement this function when manually implementing this trait. fn get_record_fields_in_ctxt( first_field_position: usize, - named_schemas: &mut Names, + named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace, ) -> Option<Vec<RecordField>> { get_record_fields_in_ctxt( @@ -335,22 +339,23 @@ pub trait AvroSchemaComponent { /// This is public so the derive macro can use it for `#[avro(with = ||)]` and `#[avro(with = path)]` pub fn get_record_fields_in_ctxt( first_field_position: usize, - named_schemas: &mut Names, + named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace, - schema_fn: fn(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema, + schema_fn: fn(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema, ) -> Option<Vec<RecordField>> { let mut record = match schema_fn(named_schemas, enclosing_namespace) { Schema::Record(record) => record, Schema::Ref { name } => { // This schema already exists in `named_schemas` so temporarily remove it so we can // get the actual schema. - let temp = named_schemas - .remove(&name) - .unwrap_or_else(|| panic!("Name '{name}' should exist in `named_schemas` otherwise Ref is invalid: {named_schemas:?}")); + assert!( + named_schemas.remove(&name), + "Name '{name}' should exist in `named_schemas` otherwise Ref is invalid: {named_schemas:?}" + ); // Get the schema let schema = schema_fn(named_schemas, enclosing_namespace); // Reinsert the old value - named_schemas.insert(name, temp); + named_schemas.insert(name); // Now check if we actually got a record and return the fields if that is the case let Schema::Record(record) = schema else { @@ -446,7 +451,7 @@ pub fn get_record_fields_in_ctxt( }; // The schema is used, so reinsert it - named_schemas.insert(name.clone(), Schema::Ref { name }); + named_schemas.insert(name.clone()); break; } @@ -468,18 +473,18 @@ where T: AvroSchemaComponent + ?Sized, { fn get_schema() -> Schema { - T::get_schema_in_ctxt(&mut HashMap::default(), &None) + T::get_schema_in_ctxt(&mut HashSet::default(), &None) } } macro_rules! impl_schema ( ($type:ty, $variant_constructor:expr) => ( impl AvroSchemaComponent for $type { - fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { + fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { $variant_constructor } - fn get_record_fields_in_ctxt(_: usize, _: &mut Names, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { None } } @@ -503,11 +508,11 @@ impl_schema!(char, Schema::String); macro_rules! impl_passthrough_schema ( ($type:ty where T: AvroSchemaComponent + ?Sized $(+ $bound:tt)*) => ( impl<T: AvroSchemaComponent $(+ $bound)* + ?Sized> AvroSchemaComponent for $type { - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { T::get_schema_in_ctxt(named_schemas, enclosing_namespace) } - fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { T::get_record_fields_in_ctxt(first_field_position, named_schemas, enclosing_namespace) } } @@ -523,11 +528,11 @@ impl_passthrough_schema!(std::sync::Mutex<T> where T: AvroSchemaComponent + ?Siz macro_rules! impl_array_schema ( ($type:ty where T: AvroSchemaComponent) => ( impl<T: AvroSchemaComponent> AvroSchemaComponent for $type { - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) } - fn get_record_fields_in_ctxt(_: usize, _: &mut Names, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: usize, _: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { None } } @@ -543,13 +548,16 @@ impl<const N: usize, T> AvroSchemaComponent for [T; N] where T: AvroSchemaComponent, { - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) } fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -560,13 +568,16 @@ impl<T> AvroSchemaComponent for HashMap<String, T> where T: AvroSchemaComponent, { - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) } fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -577,7 +588,10 @@ impl<T> AvroSchemaComponent for Option<T> where T: AvroSchemaComponent, { - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { let variants = vec![ Schema::Null, T::get_schema_in_ctxt(named_schemas, enclosing_namespace), @@ -590,7 +604,7 @@ where fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -601,12 +615,14 @@ impl AvroSchemaComponent for core::time::Duration { /// The schema is [`Schema::Duration`] with the name `duration`. /// /// This is a lossy conversion as this Avro type does not store the amount of nanoseconds. - #[expect(clippy::map_entry, reason = "We don't use the value from the map")] - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { let name = Name::new("duration") .expect("Name is valid") .fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { Schema::Ref { name } } else { let schema = Schema::Duration(FixedSchema { @@ -616,14 +632,14 @@ impl AvroSchemaComponent for core::time::Duration { size: 12, attributes: Default::default(), }); - named_schemas.insert(name, schema.clone()); + named_schemas.insert(name); schema } } fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -634,12 +650,14 @@ impl AvroSchemaComponent for uuid::Uuid { /// The schema is [`Schema::Uuid`] with the name `uuid`. /// /// The underlying schema is [`Schema::Fixed`] with a size of 16. - #[expect(clippy::map_entry, reason = "We don't use the value from the map")] - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { let name = Name::new("uuid") .expect("Name is valid") .fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { Schema::Ref { name } } else { let schema = Schema::Uuid(UuidSchema::Fixed(FixedSchema { @@ -649,14 +667,14 @@ impl AvroSchemaComponent for uuid::Uuid { size: 16, attributes: Default::default(), })); - named_schemas.insert(name, schema.clone()); + named_schemas.insert(name); schema } } fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -665,12 +683,14 @@ impl AvroSchemaComponent for uuid::Uuid { impl AvroSchemaComponent for u64 { /// The schema is [`Schema::Fixed`] of size 8 with the name `u64`. - #[expect(clippy::map_entry, reason = "We don't use the value from the map")] - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { let name = Name::new("u64") .expect("Name is valid") .fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { Schema::Ref { name } } else { let schema = Schema::Fixed(FixedSchema { @@ -680,14 +700,14 @@ impl AvroSchemaComponent for u64 { size: 8, attributes: Default::default(), }); - named_schemas.insert(name, schema.clone()); + named_schemas.insert(name); schema } } fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -696,12 +716,14 @@ impl AvroSchemaComponent for u64 { impl AvroSchemaComponent for u128 { /// The schema is [`Schema::Fixed`] of size 16 with the name `u128`. - #[expect(clippy::map_entry, reason = "We don't use the value from the map")] - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { let name = Name::new("u128") .expect("Name is valid") .fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { Schema::Ref { name } } else { let schema = Schema::Fixed(FixedSchema { @@ -711,14 +733,14 @@ impl AvroSchemaComponent for u128 { size: 16, attributes: Default::default(), }); - named_schemas.insert(name, schema.clone()); + named_schemas.insert(name); schema } } fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -727,12 +749,14 @@ impl AvroSchemaComponent for u128 { impl AvroSchemaComponent for i128 { /// The schema is [`Schema::Fixed`] of size 16 with the name `i128`. - #[expect(clippy::map_entry, reason = "We don't use the value from the map")] - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt( + named_schemas: &mut HashSet<Name>, + enclosing_namespace: &Namespace, + ) -> Schema { let name = Name::new("i128") .expect("Name is valid") .fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { Schema::Ref { name } } else { let schema = Schema::Fixed(FixedSchema { @@ -742,14 +766,14 @@ impl AvroSchemaComponent for i128 { size: 16, attributes: Default::default(), }); - named_schemas.insert(name, schema.clone()); + named_schemas.insert(name); schema } } fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None diff --git a/avro/src/serde/with.rs b/avro/src/serde/with.rs index bbcd7de..d5da690 100644 --- a/avro/src/serde/with.rs +++ b/avro/src/serde/with.rs @@ -92,22 +92,24 @@ impl Drop for BorrowedGuard { /// /// [`apache_avro::serde::bytes_opt`]: bytes_opt pub mod bytes { + use std::collections::HashSet; + use serde::{Deserializer, Serializer}; use crate::{ Schema, - schema::{Names, Namespace, RecordField}, + schema::{Name, Namespace, RecordField}, }; /// Returns [`Schema::Bytes`] - pub fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { Schema::Bytes } /// Returns `None` pub fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -154,15 +156,15 @@ pub mod bytes { /// [`apache_avro::serde::bytes`]: bytes pub mod bytes_opt { use serde::{Deserializer, Serializer}; - use std::borrow::Borrow; + use std::{borrow::Borrow, collections::HashSet}; use crate::{ Schema, - schema::{Names, Namespace, RecordField, UnionSchema}, + schema::{Name, Namespace, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Bytes)` - pub fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { Schema::Union( UnionSchema::new(vec![Schema::Null, Schema::Bytes]).expect("This is a valid union"), ) @@ -171,7 +173,7 @@ pub mod bytes_opt { /// Returns `None` pub fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -218,28 +220,29 @@ pub mod bytes_opt { /// /// [`apache_avro::serde::fixed_opt`]: fixed_opt pub mod fixed { + use std::collections::HashSet; + use super::BytesType; use serde::{Deserializer, Serializer}; use crate::{ Schema, - schema::{FixedSchema, Name, Names, Namespace, RecordField}, + schema::{FixedSchema, Name, Namespace, RecordField}, }; /// Returns `Schema::Fixed(N)` named `serde_avro_fixed_{N}` - #[expect(clippy::map_entry, reason = "We don't use the value from the map")] pub fn get_schema_in_ctxt<const N: usize>( - named_schemas: &mut Names, + named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace, ) -> Schema { let name = Name::new(&format!("serde_avro_fixed_{N}")) .expect("Name is valid") .fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { Schema::Ref { name } } else { let schema = Schema::Fixed(FixedSchema::builder().name(name.clone()).size(N).build()); - named_schemas.insert(name, schema.clone()); + named_schemas.insert(name); schema } } @@ -247,7 +250,7 @@ pub mod fixed { /// Returns `None` pub fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -296,16 +299,16 @@ pub mod fixed { pub mod fixed_opt { use super::BytesType; use serde::{Deserializer, Serializer}; - use std::borrow::Borrow; + use std::{borrow::Borrow, collections::HashSet}; use crate::{ Schema, - schema::{Names, Namespace, RecordField, UnionSchema}, + schema::{Name, Namespace, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Fixed(N))` where the fixed schema is named `serde_avro_fixed_{N}` pub fn get_schema_in_ctxt<const N: usize>( - named_schemas: &mut Names, + named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace, ) -> Schema { Schema::Union( @@ -320,7 +323,7 @@ pub mod fixed_opt { /// Returns `None` pub fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -370,22 +373,24 @@ pub mod fixed_opt { /// [`Value::Fixed`]: crate::types::Value::Fixed /// [`apache_avro::serde::slice_opt`]: slice_opt pub mod slice { + use std::collections::HashSet; + use serde::{Deserializer, Serializer}; use crate::{ Schema, - schema::{Names, Namespace, RecordField}, + schema::{Name, Namespace, RecordField}, }; /// Returns [`Schema::Bytes`] - pub fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { Schema::Bytes } /// Returns `None` pub fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None @@ -435,15 +440,15 @@ pub mod slice { /// [`apache_avro::serde::slice`]: mod@slice pub mod slice_opt { use serde::{Deserializer, Serializer}; - use std::borrow::Borrow; + use std::{borrow::Borrow, collections::HashSet}; use crate::{ Schema, - schema::{Names, Namespace, RecordField, UnionSchema}, + schema::{Name, Namespace, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Bytes)` - pub fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { Schema::Union( UnionSchema::new(vec![Schema::Null, Schema::Bytes]).expect("This is a valid union"), ) @@ -452,7 +457,7 @@ pub mod slice_opt { /// Returns `None` pub fn get_record_fields_in_ctxt( _: usize, - _: &mut Names, + _: &mut HashSet<Name>, _: &Namespace, ) -> Option<Vec<RecordField>> { None diff --git a/avro/tests/avro_schema_component.rs b/avro/tests/avro_schema_component.rs index 2efacb1..596f35f 100644 --- a/avro/tests/avro_schema_component.rs +++ b/avro/tests/avro_schema_component.rs @@ -16,11 +16,11 @@ // under the License. use apache_avro::{AvroSchemaComponent, Schema}; -use std::collections::HashMap; +use std::collections::HashSet; #[test] fn avro_rs_394_avro_schema_component_without_derive_feature() { - let schema = i32::get_schema_in_ctxt(&mut HashMap::default(), &None); + let schema = i32::get_schema_in_ctxt(&mut HashSet::default(), &None); assert!(matches!(schema, Schema::Int)); } @@ -29,5 +29,5 @@ fn avro_rs_394_avro_schema_component_without_derive_feature() { fn avro_rs_394_avro_schema_component_nested_options() { type VeryOptional = Option<Option<i32>>; - let _schema = VeryOptional::get_schema_in_ctxt(&mut HashMap::default(), &None); + let _schema = VeryOptional::get_schema_in_ctxt(&mut HashSet::default(), &None); } diff --git a/avro/tests/get_record_fields.rs b/avro/tests/get_record_fields.rs index 9b729fe..3417f26 100644 --- a/avro/tests/get_record_fields.rs +++ b/avro/tests/get_record_fields.rs @@ -19,7 +19,7 @@ use apache_avro::{ Schema, serde::{AvroSchemaComponent, get_record_fields_in_ctxt}, }; -use std::collections::HashMap; +use std::collections::HashSet; use apache_avro_test_helper::TestResult; @@ -31,7 +31,7 @@ fn avro_rs_448_default_get_record_fields_no_recursion() -> TestResult { _b: String, } - let mut named_schemas = HashMap::new(); + let mut named_schemas = HashSet::new(); let fields = get_record_fields_in_ctxt(0, &mut named_schemas, &None, Foo::get_schema_in_ctxt).unwrap(); @@ -72,7 +72,7 @@ fn avro_rs_448_default_get_record_fields_recursion() -> TestResult { _b: Option<Box<Foo>>, } - let mut named_schemas = HashMap::new(); + let mut named_schemas = HashSet::new(); let fields = get_record_fields_in_ctxt(0, &mut named_schemas, &None, Foo::get_schema_in_ctxt).unwrap(); @@ -110,7 +110,7 @@ fn avro_rs_448_default_get_record_fields_position() -> TestResult { _b: String, } - let mut named_schemas = HashMap::new(); + let mut named_schemas = HashSet::new(); let fields = get_record_fields_in_ctxt(10, &mut named_schemas, &None, Foo::get_schema_in_ctxt).unwrap(); diff --git a/avro_derive/src/lib.rs b/avro_derive/src/lib.rs index 64e8fc9..df14b97 100644 --- a/avro_derive/src/lib.rs +++ b/avro_derive/src/lib.rs @@ -113,11 +113,11 @@ fn create_trait_definition( quote! { #[automatically_derived] impl #impl_generics ::apache_avro::AvroSchemaComponent for #ident #ty_generics #where_clause { - fn get_schema_in_ctxt(named_schemas: &mut ::apache_avro::schema::Names, enclosing_namespace: &::std::option::Option<::std::string::String>) -> ::apache_avro::schema::Schema { + fn get_schema_in_ctxt(named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: &::std::option::Option<::std::string::String>) -> ::apache_avro::schema::Schema { #get_schema_impl } - fn get_record_fields_in_ctxt(mut field_position: usize, named_schemas: &mut ::apache_avro::schema::Names, enclosing_namespace: &::std::option::Option<::std::string::String>) -> ::std::option::Option<::std::vec::Vec<::apache_avro::schema::RecordField>> { + fn get_record_fields_in_ctxt(mut field_position: usize, named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: &::std::option::Option<::std::string::String>) -> ::std::option::Option<::std::vec::Vec<::apache_avro::schema::RecordField>> { #get_record_fields_impl } } @@ -128,16 +128,12 @@ fn create_trait_definition( fn handle_named_schemas(full_schema_name: String, schema_def: TokenStream) -> TokenStream { quote! { let name = apache_avro::schema::Name::new(#full_schema_name).expect(concat!("Unable to parse schema name ", #full_schema_name)).fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { apache_avro::schema::Schema::Ref{name} } else { let enclosing_namespace = &name.namespace; - // This is needed because otherwise recursive types will recurse forever and cause a stack overflow - // TODO: Breaking change to AvroSchemaComponent, have named_schemas be a set instead - named_schemas.insert(name.clone(), apache_avro::schema::Schema::Ref{name: name.clone()}); - let schema = #schema_def; - named_schemas.insert(name, schema.clone()); - schema + named_schemas.insert(name.clone()); + #schema_def } } } @@ -632,18 +628,17 @@ mod tests { #[automatically_derived] impl ::apache_avro::AvroSchemaComponent for Basic { fn get_schema_in_ctxt( - named_schemas: &mut ::apache_avro::schema::Names, + named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: &::std::option::Option<::std::string::String> ) -> ::apache_avro::schema::Schema { let name = apache_avro::schema::Name::new("Basic") .expect(concat!("Unable to parse schema name ", "Basic")) .fully_qualified_name(enclosing_namespace); - if named_schemas.contains_key(&name) { + if named_schemas.contains(&name) { apache_avro::schema::Schema::Ref { name } } else { let enclosing_namespace = &name.namespace; - named_schemas.insert(name.clone(), apache_avro::schema::Schema::Ref{name: name.clone()}); - let schema = + named_schemas.insert(name.clone()); apache_avro::schema::Schema::Enum(apache_avro::schema::EnumSchema { name: apache_avro::schema::Name::new("Basic").expect( &format!("Unable to parse enum name for schema {}", "Basic")[..] @@ -658,15 +653,13 @@ mod tests { ], default: Some("A".into()), attributes: Default::default(), - }); - named_schemas.insert(name, schema.clone()); - schema + }) } } fn get_record_fields_in_ctxt( mut field_position: usize, - named_schemas: &mut ::apache_avro::schema::Names, + named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: &::std::option::Option<::std::string::String> ) -> ::std::option::Option <::std::vec::Vec<::apache_avro::schema::RecordField>> { None @@ -797,7 +790,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_struct) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: apache_avro :: schema :: Names , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualified_name (enclosing_namespace) ; if n [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualifi [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -816,7 +809,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_enum) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: apache_avro :: schema :: Names , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualified_name (enclosing_namespace) ; if n [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualifi [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -839,7 +832,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_struct) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: apache_avro :: schema :: Names , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualified_name (enclosing_namespace) ; if n [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualifi [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -859,7 +852,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_enum) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for B { fn get_schema_in_ctxt (named_schemas : & mut :: apache_avro :: schema :: Names , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("B") . expect (concat ! ("Unable to parse schema name " , "B")) . fully_qualified_name (enclosing_namespace) ; if n [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for B { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("B") . expect (concat ! ("Unable to parse schema name " , "B")) . fully_qualifi [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -883,7 +876,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_struct) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: apache_avro :: schema :: Names , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualified_name (enclosing_namespace) ; if n [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qualifi [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } diff --git a/avro_derive/tests/derive.rs b/avro_derive/tests/derive.rs index e52ba7b..631e825 100644 --- a/avro_derive/tests/derive.rs +++ b/avro_derive/tests/derive.rs @@ -17,11 +17,15 @@ use apache_avro::{ AvroSchema, AvroSchemaComponent, Reader, Schema, Writer, from_value, - schema::{Alias, EnumSchema, FixedSchema, Name, Names, Namespace, RecordSchema}, + schema::{Alias, EnumSchema, FixedSchema, Name, Namespace, RecordSchema}, }; use proptest::prelude::*; use serde::{Deserialize, Serialize, de::DeserializeOwned}; -use std::{borrow::Cow, collections::HashMap, sync::Mutex}; +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, + sync::Mutex, +}; use pretty_assertions::assert_eq; @@ -1850,14 +1854,14 @@ fn avro_rs_397_with() { ) .unwrap(); - fn long_schema(_named_schemas: &mut Names, _enclosing_namespace: &Namespace) -> Schema { + fn long_schema(_named_schemas: &mut HashSet<Name>, _enclosing_namespace: &Namespace) -> Schema { Schema::Long } mod module { use super::*; pub fn get_schema_in_ctxt( - _named_schemas: &mut Names, + _named_schemas: &mut HashSet<Name>, _enclosing_namespace: &Namespace, ) -> Schema { Schema::Bytes @@ -1902,7 +1906,7 @@ fn avro_rs_397_with_generic() { .unwrap(); fn generic<const N: usize>( - _named_schemas: &mut Names, + _named_schemas: &mut HashSet<Name>, _enclosing_namespace: &Namespace, ) -> Schema { Schema::Fixed(FixedSchema { @@ -1993,7 +1997,7 @@ fn avro_rs_397_derive_with_expr_lambda() { #[test] fn avro_rs_398_transparent_with_skip() { - fn long_schema(_named_schemas: &mut Names, _enclosing_namespace: &Namespace) -> Schema { + fn long_schema(_named_schemas: &mut HashSet<Name>, _enclosing_namespace: &Namespace) -> Schema { Schema::Long } @@ -2293,7 +2297,7 @@ fn avro_rs_448_transparent_with() { pub _a: i32, } - let mut named_schemas = HashMap::new(); + let mut named_schemas = HashSet::new(); assert_eq!( TestStruct::get_record_fields_in_ctxt(0, &mut named_schemas, &None), None @@ -2319,7 +2323,7 @@ fn avro_rs_448_transparent_with_2() { pub _a: Foo, } - let mut named_schemas = HashMap::new(); + let mut named_schemas = HashSet::new(); let fields = TestStruct::get_record_fields_in_ctxt(0, &mut named_schemas, &None).unwrap(); assert!( named_schemas.is_empty(),
