This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch fix/ref_option in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit fb7513f2d33c4cda5ef2700db5d6174bf6d5eaa9 Author: Kriskras99 <[email protected]> AuthorDate: Sat Feb 28 21:10:50 2026 +0100 feat: Rework `Name` to be more performant This is achieved by: - Making `Name` one allocation instead of two - Returning references as much as possible to reduce cloning - `.namespace()` returns a `NamespaceRef<'_>` which is a `Option<&'_ str>` - `fully_qualified_name` returns a `Cow<'_, Name>` which reduces allocations when the name already has a namespace or there is enclosing namespace. - Allow efficient construction of the name when there is an enclosing namespace - `Display` and `fullname(None)` can now return the full name directly This required changing the `SchemaNameValidator` trait which now needs to return the index of the starting byte of the name. Included in this change is providing default implementations for `*Validator::validate` functions. This allows users to easily implement the trait by just returning a different regex. # Conflicts: # avro/src/schema/mod.rs # avro/src/serde/derive.rs # avro/src/types.rs # avro/src/writer.rs # avro/tests/get_record_fields.rs # avro_derive/src/lib.rs # avro_derive/tests/derive.rs --- avro/src/decode.rs | 17 ++- avro/src/encode.rs | 18 +-- avro/src/reader/block.rs | 6 +- avro/src/reader/mod.rs | 2 +- avro/src/reader/single_object.rs | 2 +- avro/src/schema/mod.rs | 93 ++++++---------- avro/src/schema/name.rs | 191 ++++++++++++++++++++------------ avro/src/schema/parser.rs | 71 ++++++------ avro/src/schema/record/field.rs | 17 ++- avro/src/schema/resolve.rs | 63 ++++++----- avro/src/schema/union.rs | 28 ++--- avro/src/schema_compatibility.rs | 6 +- avro/src/schema_equality.rs | 5 +- avro/src/serde/derive.rs | 109 +++++++++--------- avro/src/serde/ser_schema/mod.rs | 28 ++--- avro/src/serde/with.rs | 50 +++++---- avro/src/types.rs | 60 +++++----- avro/src/util.rs | 12 +- avro/src/validator.rs | 78 ++++++------- avro/src/writer.rs | 14 +-- avro/tests/avro_schema_component.rs | 4 +- avro/tests/get_record_fields.rs | 12 +- avro/tests/schema.rs | 38 +++---- avro/tests/serde_human_readable_true.rs | 2 +- avro/tests/validators.rs | 5 +- avro_derive/src/lib.rs | 29 +++-- avro_derive/tests/derive.rs | 50 +++++---- 27 files changed, 508 insertions(+), 502 deletions(-) diff --git a/avro/src/decode.rs b/avro/src/decode.rs index dfa4bd3..cab6797 100644 --- a/avro/src/decode.rs +++ b/avro/src/decode.rs @@ -15,17 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::{InnerDecimalSchema, UuidSchema}; +use crate::schema::{InnerDecimalSchema, NamespaceRef, UuidSchema}; use crate::{ AvroResult, Error, bigdecimal::deserialize_big_decimal, decimal::Decimal, duration::Duration, error::Details, - schema::{ - DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, - Schema, - }, + schema::{DecimalSchema, EnumSchema, FixedSchema, Name, RecordSchema, ResolvedSchema, Schema}, types::Value, util::{safe_len, zag_i32, zag_i64}, }; @@ -74,13 +71,13 @@ fn decode_seq_len<R: Read>(reader: &mut R) -> AvroResult<usize> { /// Decode a `Value` from avro format given its `Schema`. pub fn decode<R: Read>(schema: &Schema, reader: &mut R) -> AvroResult<Value> { let rs = ResolvedSchema::try_from(schema)?; - decode_internal(schema, rs.get_names(), &None, reader) + decode_internal(schema, rs.get_names(), None, reader) } pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( schema: &Schema, names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, reader: &mut R, ) -> AvroResult<Value> { match schema { @@ -313,7 +310,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( decode_internal( &field.schema, names, - &fully_qualified_name.namespace, + fully_qualified_name.namespace(), reader, )?, )); @@ -344,11 +341,11 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( decode_internal( resolved.borrow(), names, - &fully_qualified_name.namespace, + fully_qualified_name.namespace(), reader, ) } else { - Err(Details::SchemaResolutionError(fully_qualified_name).into()) + Err(Details::SchemaResolutionError(fully_qualified_name.into_owned()).into()) } } } diff --git a/avro/src/encode.rs b/avro/src/encode.rs index 656509b..2c7eb9f 100644 --- a/avro/src/encode.rs +++ b/avro/src/encode.rs @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::{InnerDecimalSchema, UuidSchema}; +use crate::schema::{InnerDecimalSchema, NamespaceRef, UuidSchema}; use crate::{ AvroResult, bigdecimal::serialize_big_decimal, error::Details, schema::{ - DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, - Schema, SchemaKind, UnionSchema, + DecimalSchema, EnumSchema, FixedSchema, Name, RecordSchema, ResolvedSchema, Schema, + SchemaKind, UnionSchema, }, types::{Value, ValueKind}, util::{zig_i32, zig_i64}, @@ -37,7 +37,7 @@ use std::{borrow::Borrow, collections::HashMap, io::Write}; /// encoding for complex type values. pub fn encode<W: Write>(value: &Value, schema: &Schema, writer: &mut W) -> AvroResult<usize> { let rs = ResolvedSchema::try_from(schema)?; - encode_internal(value, schema, rs.get_names(), &None, writer) + encode_internal(value, schema, rs.get_names(), None, writer) } /// Encode `s` as the _bytes_ primitive type. @@ -66,14 +66,16 @@ pub(crate) fn encode_internal<W: Write, S: Borrow<Schema>>( value: &Value, schema: &Schema, names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, writer: &mut W, ) -> AvroResult<usize> { if let Schema::Ref { name } = schema { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); let resolved = names .get(&fully_qualified_name) - .ok_or(Details::SchemaResolutionError(fully_qualified_name))?; + .ok_or(Details::SchemaResolutionError( + fully_qualified_name.into_owned(), + ))?; return encode_internal(value, resolved.borrow(), names, enclosing_namespace, writer); } @@ -290,7 +292,7 @@ pub(crate) fn encode_internal<W: Write, S: Borrow<Schema>>( .. }) = *schema { - let record_namespace = name.fully_qualified_name(enclosing_namespace).namespace; + let record_namespace = name.namespace().or(enclosing_namespace); let mut lookup = HashMap::new(); value_fields.iter().for_each(|(name, field)| { @@ -312,7 +314,7 @@ pub(crate) fn encode_internal<W: Write, S: Borrow<Schema>>( value, &schema_field.schema, names, - &record_namespace, + record_namespace, writer, )?; } else { diff --git a/avro/src/reader/block.rs b/avro/src/reader/block.rs index 1571c06..06c07a2 100644 --- a/avro/src/reader/block.rs +++ b/avro/src/reader/block.rs @@ -188,7 +188,7 @@ impl<'r, R: Read> Block<'r, R> { let item = decode_internal( &self.writer_schema, &self.names_refs, - &None, + None, &mut block_bytes, )?; let item = match read_schema { @@ -221,7 +221,7 @@ impl<'r, R: Read> Block<'r, R> { resolve_names_with_schemata( self.schemata.iter().copied(), &mut names, - &None, + None, &HashMap::new(), )?; self.names_refs = names.into_iter().map(|(n, s)| (n, s.clone())).collect(); @@ -229,7 +229,7 @@ impl<'r, R: Read> Block<'r, R> { } else { self.writer_schema = Schema::parse(&json)?; let mut names = HashMap::new(); - resolve_names(&self.writer_schema, &mut names, &None, &HashMap::new())?; + resolve_names(&self.writer_schema, &mut names, None, &HashMap::new())?; self.names_refs = names.into_iter().map(|(n, s)| (n, s.clone())).collect(); } Ok(()) diff --git a/avro/src/reader/mod.rs b/avro/src/reader/mod.rs index 598e1ff..0113e50 100644 --- a/avro/src/reader/mod.rs +++ b/avro/src/reader/mod.rs @@ -192,7 +192,7 @@ pub fn from_avro_datum_reader_schemata<R: Read>( reader_schemata: Vec<&Schema>, ) -> AvroResult<Value> { let rs = ResolvedSchema::try_from(writer_schemata)?; - let value = decode_internal(writer_schema, rs.get_names(), &None, reader)?; + let value = decode_internal(writer_schema, rs.get_names(), None, reader)?; match reader_schema { Some(schema) => { if reader_schemata.is_empty() { diff --git a/avro/src/reader/single_object.rs b/avro/src/reader/single_object.rs index 6fd61d1..c6151a3 100644 --- a/avro/src/reader/single_object.rs +++ b/avro/src/reader/single_object.rs @@ -55,7 +55,7 @@ impl GenericSingleObjectReader { decode_internal( self.write_schema.get_root_schema(), self.write_schema.get_names(), - &None, + None, reader, ) } else { diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index a0f3077..40fc0f9 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -28,7 +28,7 @@ pub(crate) use crate::schema::resolve::{ ResolvedOwnedSchema, resolve_names, resolve_names_with_schemata, }; pub use crate::schema::{ - name::{Alias, Aliases, Name, Names, NamesRef, Namespace}, + name::{Alias, Aliases, Name, Names, NamesRef, Namespace, NamespaceRef}, record::{RecordField, RecordFieldBuilder, RecordSchema, RecordSchemaBuilder}, resolve::ResolvedSchema, union::{UnionSchema, UnionSchemaBuilder}, @@ -381,10 +381,10 @@ impl FixedSchema { S: Serializer, { map.serialize_entry("type", "fixed")?; - if let Some(n) = self.name.namespace.as_ref() { + if let Some(n) = self.name.namespace() { map.serialize_entry("namespace", n)?; } - map.serialize_entry("name", &self.name.name)?; + map.serialize_entry("name", &self.name.name())?; if let Some(docstr) = self.doc.as_ref() { map.serialize_entry("doc", docstr)?; } @@ -406,10 +406,7 @@ impl FixedSchema { /// All other fields are `None` or empty. pub(crate) fn copy_only_size(&self) -> Self { Self { - name: Name { - name: String::new(), - namespace: None, - }, + name: Name::invalid_empty_name(), aliases: None, doc: None, size: self.size, @@ -556,7 +553,7 @@ impl Schema { let json = json.as_ref(); let schema: JsonValue = serde_json::from_str(json).map_err(Details::ParseSchemaJson)?; if let JsonValue::Object(inner) = &schema { - let name = Name::parse(inner, &None)?; + let name = Name::parse(inner, None)?; let previous_value = input_schemas.insert(name.clone(), schema); if previous_value.is_some() { return Err(Details::NameCollision(name.fullname(None)).into()); @@ -598,7 +595,7 @@ impl Schema { let json = json.as_ref(); let schema: JsonValue = serde_json::from_str(json).map_err(Details::ParseSchemaJson)?; if let JsonValue::Object(inner) = &schema { - let name = Name::parse(inner, &None)?; + let name = Name::parse(inner, None)?; if let Some(_previous) = input_schemas.insert(name.clone(), schema) { return Err(Details::NameCollision(name.fullname(None)).into()); } @@ -615,7 +612,7 @@ impl Schema { parser.parse_input_schemas()?; let value = serde_json::from_str(schema).map_err(Details::ParseSchemaJson)?; - let schema = parser.parse(&value, &None)?; + let schema = parser.parse(&value, None)?; let schemata = parser.parse_list()?; Ok((schema, schemata)) } @@ -632,14 +629,14 @@ impl Schema { /// Parses an Avro schema from JSON. pub fn parse(value: &JsonValue) -> AvroResult<Schema> { let mut parser = Parser::default(); - parser.parse(value, &None) + parser.parse(value, None) } /// Parses an Avro schema from JSON. /// Any `Schema::Ref`s must be known in the `names` map. pub(crate) fn parse_with_names(value: &JsonValue, names: Names) -> AvroResult<Schema> { let mut parser = Parser::new(HashMap::with_capacity(1), Vec::with_capacity(1), names); - parser.parse(value, &None) + parser.parse(value, None) } /// Returns the custom attributes (metadata) if the schema supports them. @@ -695,8 +692,8 @@ impl Schema { } /// Returns the namespace of the schema if it has one. - pub fn namespace(&self) -> Namespace { - self.name().and_then(|n| n.namespace.clone()) + pub fn namespace(&self) -> NamespaceRef<'_> { + self.name().and_then(|n| n.namespace()) } /// Returns the aliases of the schema if it has ones. @@ -871,10 +868,10 @@ impl Serialize for Schema { }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "record")?; - if let Some(ref n) = name.namespace { + if let Some(ref n) = name.namespace() { map.serialize_entry("namespace", n)?; } - map.serialize_entry("name", &name.name)?; + map.serialize_entry("name", &name.name())?; if let Some(docstr) = doc { map.serialize_entry("doc", docstr)?; } @@ -897,10 +894,10 @@ impl Serialize for Schema { }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "enum")?; - if let Some(ref n) = name.namespace { + if let Some(ref n) = name.namespace() { map.serialize_entry("namespace", n)?; } - map.serialize_entry("name", &name.name)?; + map.serialize_entry("name", &name.name())?; map.serialize_entry("symbols", symbols)?; if let Some(aliases) = aliases { @@ -1763,10 +1760,7 @@ mod tests { lookup.insert("next".to_owned(), 1); let expected = Schema::Record(RecordSchema { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, + name: Name::new("LongList")?, aliases: Some(vec![Alias::new("LinkedLongs").unwrap()]), doc: None, fields: vec![ @@ -1776,15 +1770,12 @@ mod tests { .build(), RecordField::builder() .name("next".to_string()) - .schema(Schema::Union(UnionSchema::new(vec![ + .schema(Schema::union(vec![ Schema::Null, Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, + name: Name::new("LongList")?, }, - ])?)) + ])?) .build(), ], lookup, @@ -1819,10 +1810,7 @@ mod tests { lookup.insert("next".to_owned(), 1); let expected = Schema::Record(RecordSchema { - name: Name { - name: "record".to_owned(), - namespace: None, - }, + name: Name::new("record")?, aliases: None, doc: None, fields: vec![ @@ -1833,10 +1821,7 @@ mod tests { RecordField::builder() .name("next".to_string()) .schema(Schema::Ref { - name: Name { - name: "record".to_owned(), - namespace: None, - }, + name: Name::new("record")?, }) .build(), ], @@ -1879,10 +1864,7 @@ mod tests { lookup.insert("next".to_owned(), 1); let expected = Schema::Record(RecordSchema { - name: Name { - name: "record".to_owned(), - namespace: None, - }, + name: Name::new("record")?, aliases: None, doc: None, fields: vec![ @@ -1945,20 +1927,14 @@ mod tests { lookup.insert("next".to_owned(), 1); let expected = Schema::Record(RecordSchema { - name: Name { - name: "record".to_owned(), - namespace: None, - }, + name: Name::new("record")?, aliases: None, doc: None, fields: vec![ RecordField::builder() .name("fixed".to_string()) .schema(Schema::Fixed(FixedSchema { - name: Name { - name: "fixed".to_owned(), - namespace: None, - }, + name: Name::new("fixed")?, aliases: None, doc: None, size: 456, @@ -2246,8 +2222,8 @@ mod tests { let schema = Schema::parse_str(schema)?; if let Schema::Record(RecordSchema { name, .. }) = schema { - assert_eq!(name.name, "name"); - assert_eq!(name.namespace, Some("space".to_string())); + assert_eq!(name.name(), "name"); + assert_eq!(name.namespace(), Some("space")); } else { panic!("Expected a record schema!"); } @@ -2273,7 +2249,7 @@ mod tests { let schema = Schema::parse_str(schema)?; if let Schema::Record(RecordSchema { name, .. }) = schema { - assert_eq!(name.namespace, Some("space1".to_string())); + assert_eq!(name.namespace(), Some("space1")); } else { panic!("Expected a record schema!"); } @@ -2299,7 +2275,7 @@ mod tests { let schema = Schema::parse_str(schema)?; if let Schema::Record(RecordSchema { name, .. }) = schema { - assert_eq!(name.namespace, Some("space2".to_string())); + assert_eq!(name.namespace(), Some("space2")); } else { panic!("Expected a record schema!"); } @@ -3132,9 +3108,9 @@ mod tests { assert_eq!(canonical_form, expected); let name = Name::new("my_name")?; - let fullname = name.fullname(Some("".to_string())); + let fullname = name.fullname(Some("")); assert_eq!(fullname, "my_name"); - let qname = name.fully_qualified_name(&Some("".to_string())).to_string(); + let qname = name.fully_qualified_name(Some("")).to_string(); assert_eq!(qname, "my_name"); Ok(()) @@ -3483,7 +3459,7 @@ mod tests { let name = Name::new(".my_name")?; let fullname = name.fullname(None); assert_eq!(fullname, "my_name"); - let qname = name.fully_qualified_name(&None).to_string(); + let qname = name.fully_qualified_name(None).to_string(); assert_eq!(qname, "my_name"); Ok(()) @@ -4385,7 +4361,7 @@ mod tests { }) ); assert_logged( - r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, size: 6, .. })"#, + r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", .. }, size: 6, .. })"#, ); Ok(()) @@ -4487,10 +4463,7 @@ mod tests { let mut lookup = BTreeMap::new(); lookup.insert("value".to_owned(), 0); Schema::Record(RecordSchema { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, + name: Name::new("LongList")?, aliases: Some(vec![Alias::new("LinkedLongs").unwrap()]), doc: None, fields: vec -#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[derive(Clone, Hash, PartialEq, Eq)] pub struct Name { - pub name: String, - pub namespace: Namespace, + /// The full name + namespace_and_name: String, + /// Start byte of the name part + /// + /// If this is zero, then there is no namespace. + index_of_name: usize, } /// Represents the aliases for Named Schema @@ -52,72 +57,90 @@ pub type Names = HashMap<Name, Schema>; pub type NamesRef<'a> = HashMap<Name, &'a Schema>; /// Represents the namespace for Named Schema pub type Namespace = Option<String>; +/// Represents the namespace for Named Schema +pub type NamespaceRef<'a> = Option<&'a str>; impl Name { /// Create a new `Name`. /// Parses the optional `namespace` from the `name` string. /// `aliases` will not be defined. - pub fn new(name: &str) -> AvroResult<Self> { - let (name, namespace) = Name::get_name_and_namespace(name)?; - Ok(Self { - name, - namespace: namespace.filter(|ns| !ns.is_empty()), - }) + pub fn new(name: impl Into<String> + AsRef<str>) -> AvroResult<Self> { + Self::new_with_enclosing_namespace(name, None) } - fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> { - validate_schema_name(name) + /// Create a new `Name` using the namespace from `enclosing_namespace` if absent. + pub fn new_with_enclosing_namespace( + name: impl Into<String> + AsRef<str>, + enclosing_namespace: NamespaceRef, + ) -> AvroResult<Self> { + // Having both `Into<String>` and `AsRef<str>` allows optimal use in both of these cases: + // - `name` is a `String`. We can reuse the allocation when `enclosing_namespace` is `None` + // or `name` already has a namespace. + // - `name` is a `str`. With only `Into<String` we need an extra allocation in the case `name` + // doesn't have namespace and `enclosing_namespace` is `Some`. Having `AsRef<str>` allows + // skipping that allocation. + let name_ref = name.as_ref(); + let index_of_name = validate_schema_name(name_ref)?; + + if index_of_name == 0 + && let Some(namespace) = enclosing_namespace + && !namespace.is_empty() + { + validate_namespace(namespace)?; + Ok(Self { + namespace_and_name: format!("{namespace}.{name_ref}"), + index_of_name: namespace.len() + 1, + }) + } else if index_of_name == 1 { + // Name has a leading dot + Ok(Self { + namespace_and_name: name.as_ref()[1..].into(), + index_of_name: 0, + }) + } else { + Ok(Self { + namespace_and_name: name.into(), + index_of_name, + }) + } } /// Parse a `serde_json::Value` into a `Name`. pub(crate) fn parse( complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Self> { - let (name, namespace_from_name) = complex - .name() - .map(|name| Name::get_name_and_namespace(name.as_str()).unwrap()) - .ok_or(Details::GetNameField)?; - // FIXME Reading name from the type is wrong ! The name there is just a metadata (AVRO-3430) - let type_name = match complex.get("type") { - Some(Value::Object(complex_type)) => complex_type.name().or(None), - _ => None, - }; - - let namespace = namespace_from_name - .or_else(|| { - complex - .string("namespace") - .or_else(|| enclosing_namespace.clone()) - }) - .filter(|ns| !ns.is_empty()); + let name_field = complex.name().ok_or(Details::GetNameField)?; + Self::new_with_enclosing_namespace( + name_field, + complex.string("namespace").or(enclosing_namespace), + ) + } - if let Some(ref ns) = namespace { - validate_namespace(ns)?; - } + pub fn name(&self) -> &str { + &self.namespace_and_name[self.index_of_name..] + } - Ok(Self { - name: type_name.unwrap_or(name), - namespace, - }) + pub fn namespace(&self) -> NamespaceRef<'_> { + if self.index_of_name == 0 { + None + } else { + Some(&self.namespace_and_name[..(self.index_of_name - 1)]) + } } /// Return the `fullname` of this `Name` /// /// More information about fullnames can be found in the /// [Avro specification](https://avro.apache.org/docs/++version++/specification/#names) - pub fn fullname(&self, default_namespace: Namespace) -> String { - if self.name.contains('.') { - self.name.clone() + pub fn fullname(&self, enclosing_namespace: NamespaceRef) -> String { + if self.index_of_name == 0 + && let Some(namespace) = enclosing_namespace + && !namespace.is_empty() + { + format!("{namespace}.{}", self.namespace_and_name) } else { - let namespace = self.namespace.clone().or(default_namespace); - - match namespace { - Some(ref namespace) if !namespace.is_empty() => { - format!("{}.{}", namespace, self.name) - } - _ => self.name.clone(), - } + self.namespace_and_name.clone() } } @@ -126,22 +149,39 @@ impl Name { /// ``` /// # use apache_avro::{Error, schema::Name}; /// assert_eq!( - /// Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())), + /// Name::new("some_name")?.fully_qualified_name(Some("some_namespace")).into_owned(), /// Name::new("some_namespace.some_name")? /// ); /// assert_eq!( - /// Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())), + /// Name::new("some_namespace.some_name")?.fully_qualified_name(Some("other_namespace")).into_owned(), /// Name::new("some_namespace.some_name")? /// ); /// # Ok::<(), Error>(()) /// ``` - pub fn fully_qualified_name(&self, enclosing_namespace: &Namespace) -> Name { - Name { - name: self.name.clone(), - namespace: self - .namespace - .clone() - .or_else(|| enclosing_namespace.clone().filter(|ns| !ns.is_empty())), + pub fn fully_qualified_name(&self, enclosing_namespace: NamespaceRef) -> Cow<'_, Name> { + if self.index_of_name == 0 + && let Some(namespace) = enclosing_namespace + && !namespace.is_empty() + { + Cow::Owned(Self { + namespace_and_name: format!("{namespace}.{}", self.namespace_and_name), + index_of_name: namespace.len() + 1, + }) + } else { + Cow::Borrowed(self) + } + } + + /// Create an empty name. + /// + /// This name is invalid and should never be used anywhere! The only valid use is filling + /// a `Name` field that will not be used. + /// + /// Using this name will cause a panic. + pub(crate) fn invalid_empty_name() -> Self { + Self { + namespace_and_name: String::new(), + index_of_name: usize::MAX, } } } @@ -170,9 +210,22 @@ impl FromStr for Name { } } -impl fmt::Display for Name { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.fullname(None)[..]) +impl Debug for Name { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut debug = f.debug_struct("Name"); + debug.field("name", &self.name()); + if self.index_of_name != 0 { + debug.field("namespace", &self.namespace()); + debug.finish() + } else { + debug.finish_non_exhaustive() + } + } +} + +impl Display for Name { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.namespace_and_name) } } @@ -184,7 +237,7 @@ impl<'de> Deserialize<'de> for Name { Value::deserialize(deserializer).and_then(|value| { use serde::de::Error; if let Value::Object(json) = value { - Name::parse(&json, &None).map_err(Error::custom) + Name::parse(&json, None).map_err(Error::custom) } else { Err(Error::custom(format!("Expected a JSON object: {value:?}"))) } @@ -203,18 +256,18 @@ impl Alias { } pub fn name(&self) -> &str { - &self.0.name + self.0.name() } - pub fn namespace(&self) -> &Namespace { - &self.0.namespace + pub fn namespace(&self) -> NamespaceRef<'_> { + self.0.namespace() } - pub fn fullname(&self, default_namespace: Namespace) -> String { - self.0.fullname(default_namespace) + pub fn fullname(&self, enclosing_namespace: NamespaceRef) -> String { + self.0.fullname(enclosing_namespace) } - pub fn fully_qualified_name(&self, default_namespace: &Namespace) -> Name { + pub fn fully_qualified_name(&self, default_namespace: NamespaceRef) -> Cow<'_, Name> { self.0.fully_qualified_name(default_namespace) } } @@ -263,8 +316,8 @@ mod tests { /// Zero-length namespace is considered as no-namespace. fn test_namespace_from_name_with_empty_value() -> TestResult { let name = Name::new(".name")?; - assert_eq!(name.name, "name"); - assert_eq!(name.namespace, None); + assert_eq!(name.namespace_and_name, "name"); + assert_eq!(name.index_of_name, 0); Ok(()) } diff --git a/avro/src/schema/parser.rs b/avro/src/schema/parser.rs index 6011d82..0f89018 100644 --- a/avro/src/schema/parser.rs +++ b/avro/src/schema/parser.rs @@ -18,7 +18,7 @@ use crate::error::Details; use crate::schema::{ Alias, Aliases, ArraySchema, DecimalMetadata, DecimalSchema, EnumSchema, FixedSchema, - MapSchema, Name, Names, Namespace, Precision, RecordField, RecordSchema, Scale, Schema, + MapSchema, Name, Names, NamespaceRef, Precision, RecordField, RecordSchema, Scale, Schema, SchemaKind, UnionSchema, UuidSchema, }; use crate::types; @@ -61,7 +61,7 @@ impl Parser { /// Create a `Schema` from a string representing a JSON Avro schema. pub(super) fn parse_str(&mut self, input: &str) -> AvroResult<Schema> { let value = serde_json::from_str(input).map_err(Details::ParseSchemaJson)?; - self.parse(&value, &None) + self.parse(&value, None) } /// Create an array of `Schema`s from an iterator of JSON Avro schemas. @@ -94,7 +94,7 @@ impl Parser { .input_schemas .remove_entry(&next_name) .expect("Key unexpectedly missing"); - let parsed = self.parse(&value, &None)?; + let parsed = self.parse(&value, None)?; self.parsed_schemas .insert(self.get_schema_type_name(name, value), parsed); } @@ -105,7 +105,7 @@ impl Parser { pub(super) fn parse( &mut self, value: &Value, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { match *value { Value::String(ref t) => self.parse_known_schema(t.as_str(), enclosing_namespace), @@ -119,7 +119,7 @@ impl Parser { fn parse_known_schema( &mut self, name: &str, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { match name { "null" => Ok(Schema::Null), @@ -147,7 +147,7 @@ impl Parser { pub(super) fn fetch_schema_ref( &mut self, name: &str, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { fn get_schema_ref(parsed: &Schema) -> Schema { match parsed { @@ -160,8 +160,7 @@ impl Parser { } } - let name = Name::new(name)?; - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + let fully_qualified_name = Name::new_with_enclosing_namespace(name, enclosing_namespace)?; if self.parsed_schemas.contains_key(&fully_qualified_name) { return Ok(Schema::Ref { @@ -173,9 +172,11 @@ impl Parser { } // For good error reporting we add this check - match name.name.as_str() { + match fully_qualified_name.name() { "record" | "enum" | "fixed" => { - return Err(Details::InvalidSchemaRecord(name.to_string()).into()); + return Err( + Details::InvalidSchemaRecord(fully_qualified_name.name().to_string()).into(), + ); } _ => (), } @@ -194,9 +195,11 @@ impl Parser { })?; // parsing a full schema from inside another schema. Other full schema will not inherit namespace - let parsed = self.parse(&value, &None)?; - self.parsed_schemas - .insert(self.get_schema_type_name(name, value), parsed.clone()); + let parsed = self.parse(&value, None)?; + self.parsed_schemas.insert( + self.get_schema_type_name(fully_qualified_name, value), + parsed.clone(), + ); Ok(get_schema_ref(&parsed)) } @@ -248,13 +251,13 @@ impl Parser { pub(super) fn parse_complex( &mut self, complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { // Try to parse this as a native complex type. fn parse_as_native_complex( complex: &Map<String, Value>, parser: &mut Parser, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { match complex.get("type") { Some(value) => match value { @@ -476,11 +479,11 @@ impl Parser { self.resolving_schemas .insert(name.clone(), resolving_schema.clone()); - let namespace = &name.namespace; + let namespace = name.namespace(); if let Some(aliases) = aliases { aliases.iter().for_each(|alias| { - let alias_fullname = alias.fully_qualified_name(namespace); + let alias_fullname = alias.fully_qualified_name(namespace).into_owned(); self.resolving_schemas .insert(alias_fullname, resolving_schema.clone()); }); @@ -499,13 +502,14 @@ impl Parser { .insert(fully_qualified_name.clone(), schema.clone()); self.resolving_schemas.remove(fully_qualified_name); - let namespace = &fully_qualified_name.namespace; + let namespace = fully_qualified_name.namespace(); if let Some(aliases) = aliases { aliases.iter().for_each(|alias| { let alias_fullname = alias.fully_qualified_name(namespace); self.resolving_schemas.remove(&alias_fullname); - self.parsed_schemas.insert(alias_fullname, schema.clone()); + self.parsed_schemas + .insert(alias_fullname.into_owned(), schema.clone()); }); } } @@ -514,13 +518,12 @@ impl Parser { fn get_already_seen_schema( &self, complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Option<&Schema> { match complex.get("type") { Some(Value::String(typ)) => { - let name = Name::new(typ.as_str()) - .unwrap() - .fully_qualified_name(enclosing_namespace); + let name = + Name::new_with_enclosing_namespace(typ.as_str(), enclosing_namespace).unwrap(); self.resolving_schemas .get(&name) .or_else(|| self.parsed_schemas.get(&name)) @@ -533,7 +536,7 @@ impl Parser { fn parse_record( &mut self, complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { let fields_opt = complex.get("fields"); @@ -545,7 +548,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); let mut lookup = BTreeMap::new(); @@ -607,7 +610,7 @@ impl Parser { fn parse_enum( &mut self, complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { let symbols_opt = complex.get("symbols"); @@ -619,7 +622,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); let symbols: Vec<String> = symbols_opt .and_then(|v| v.as_array()) @@ -684,7 +687,7 @@ impl Parser { fn parse_array( &mut self, complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { let items = complex .get("items") @@ -721,7 +724,7 @@ impl Parser { fn parse_map( &mut self, complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { let types = complex .get("values") @@ -759,7 +762,7 @@ impl Parser { fn parse_union( &mut self, items: &[Value], - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { items .iter() @@ -787,7 +790,7 @@ impl Parser { fn parse_fixed( &mut self, complex: &Map<String, Value>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> AvroResult<Schema> { let size_opt = complex.get("size"); if size_opt.is_none() @@ -810,7 +813,7 @@ impl Parser { let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; let aliases = - self.fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); + self.fix_aliases_namespace(complex.aliases(), fully_qualified_name.namespace()); let schema = Schema::Fixed(FixedSchema { name: fully_qualified_name.clone(), @@ -833,7 +836,7 @@ impl Parser { fn fix_aliases_namespace( &self, aliases: Option<Vec<String>>, - namespace: &Namespace, + namespace: NamespaceRef, ) -> Aliases { aliases.map(|aliases| { aliases @@ -856,7 +859,7 @@ impl Parser { fn get_schema_type_name(&self, name: Name, value: Value) -> Name { match value.get("type") { Some(Value::Object(complex_type)) => match complex_type.name() { - Some(name) => Name::new(name.as_str()).unwrap(), + Some(name) => Name::new(name).unwrap(), _ => name, }, _ => name, diff --git a/avro/src/schema/record/field.rs b/avro/src/schema/record/field.rs index 043a51c..11326e1 100644 --- a/avro/src/schema/record/field.rs +++ b/avro/src/schema/record/field.rs @@ -89,10 +89,10 @@ impl RecordField { ) -> AvroResult<Self> { let name = field.name().ok_or(Details::GetNameFieldFromRecord)?; - validate_record_field_name(&name)?; + validate_record_field_name(name)?; let ty = field.get("type").ok_or(Details::GetRecordFieldTypeField)?; - let schema = parser.parse(ty, &enclosing_record.namespace)?; + let schema = parser.parse(ty, enclosing_record.namespace())?; if let Some(logical_type) = field.get("logicalType") { warn!( @@ -103,7 +103,7 @@ impl RecordField { let default = field.get("default").cloned(); Self::resolve_default_value( &schema, - &name, + name, &enclosing_record.fullname(None), parser.get_parsed_schemas(), &default, @@ -123,7 +123,7 @@ impl RecordField { .unwrap_or_default(); Ok(RecordField { - name, + name: name.into(), doc: field.doc(), default, aliases, @@ -147,7 +147,7 @@ impl RecordField { let resolved = schemas.iter().any(|schema| { avro_value .to_owned() - .resolve_internal(schema, names, &schema.namespace(), &None) + .resolve_internal(schema, names, schema.namespace(), &None) .is_ok() }); @@ -165,7 +165,7 @@ impl RecordField { } _ => { let resolved = avro_value - .resolve_internal(field_schema, names, &field_schema.namespace(), &None) + .resolve_internal(field_schema, names, field_schema.namespace(), &None) .is_ok(); if !resolved { @@ -250,10 +250,7 @@ mod tests { .schema(Schema::Union(UnionSchema::new(vec![ Schema::Null, Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, + name: Name::new("LongList")?, }, ])?)) .build(); diff --git a/avro/src/schema/resolve.rs b/avro/src/schema/resolve.rs index 9274e61..449436b 100644 --- a/avro/src/schema/resolve.rs +++ b/avro/src/schema/resolve.rs @@ -17,8 +17,8 @@ use crate::error::Details; use crate::schema::{ - DecimalSchema, EnumSchema, FixedSchema, InnerDecimalSchema, NamesRef, Namespace, RecordSchema, - UnionSchema, UuidSchema, + DecimalSchema, EnumSchema, FixedSchema, InnerDecimalSchema, NamesRef, NamespaceRef, + RecordSchema, UnionSchema, UuidSchema, }; use crate::{AvroResult, Error, Schema}; use std::collections::HashMap; @@ -51,7 +51,7 @@ impl<'s> ResolvedSchema<'s> { /// These schemas will be resolved in order, so references to schemas later in the /// list is not supported. pub fn new_with_schemata(schemata: Vec<&'s Schema>) -> AvroResult<Self> { - Self::new_with_known_schemata(schemata, &None, &HashMap::new()) + Self::new_with_known_schemata(schemata, None, &HashMap::new()) } /// Creates `ResolvedSchema` with some already known schemas. @@ -59,7 +59,7 @@ impl<'s> ResolvedSchema<'s> { /// Those schemata would be used to resolve references if needed. pub fn new_with_known_schemata<'n>( schemata_to_resolve: Vec<&'s Schema>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, known_schemata: &'n NamesRef<'n>, ) -> AvroResult<Self> { let mut names = HashMap::new(); @@ -116,7 +116,7 @@ impl ResolvedOwnedSchema { root_schema, names_builder: |schema: &Schema| { let mut names = HashMap::new(); - resolve_names(schema, &mut names, &None, &HashMap::new())?; + resolve_names(schema, &mut names, None, &HashMap::new())?; Ok::<_, Error>(names) }, } @@ -146,7 +146,7 @@ impl TryFrom<Schema> for ResolvedOwnedSchema { pub fn resolve_names<'s, 'n>( schema: &'s Schema, names: &mut NamesRef<'s>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, known_schemata: &NamesRef<'n>, ) -> AvroResult<()> { match schema { @@ -170,7 +170,7 @@ pub fn resolve_names<'s, 'n>( .. }) | Schema::Duration(FixedSchema { name, .. }) => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + let fully_qualified_name = name.fully_qualified_name(enclosing_namespace).into_owned(); if names.contains_key(&fully_qualified_name) || known_schemata.contains_key(&fully_qualified_name) { @@ -181,16 +181,21 @@ pub fn resolve_names<'s, 'n>( } } Schema::Record(RecordSchema { name, fields, .. }) => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + let fully_qualified_name = name.fully_qualified_name(enclosing_namespace).into_owned(); if names.contains_key(&fully_qualified_name) || known_schemata.contains_key(&fully_qualified_name) { Err(Details::AmbiguousSchemaDefinition(fully_qualified_name).into()) } else { - let record_namespace = fully_qualified_name.namespace.clone(); + let record_namespace = fully_qualified_name.namespace().map(ToString::to_string); names.insert(fully_qualified_name, schema); for field in fields { - resolve_names(&field.schema, names, &record_namespace, known_schemata)? + resolve_names( + &field.schema, + names, + record_namespace.as_deref(), + known_schemata, + )? } Ok(()) } @@ -202,7 +207,7 @@ pub fn resolve_names<'s, 'n>( { Ok(()) } else { - Err(Details::SchemaResolutionError(fully_qualified_name).into()) + Err(Details::SchemaResolutionError(fully_qualified_name.into_owned()).into()) } } _ => Ok(()), @@ -212,7 +217,7 @@ pub fn resolve_names<'s, 'n>( pub fn resolve_names_with_schemata<'s, 'n>( schemata: impl IntoIterator<Item = &'s Schema>, names: &mut NamesRef<'s>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, known_schemata: &NamesRef<'n>, ) -> AvroResult<()> { for schema in schemata { @@ -265,7 +270,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_record_name"] { + for s in ["space.record_name", "space.inner_record_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -306,7 +311,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_record_name"] { + for s in ["space.record_name", "space.inner_record_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -342,7 +347,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_enum_name"] { + for s in ["space.record_name", "space.inner_enum_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -378,7 +383,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_enum_name"] { + for s in ["space.record_name", "space.inner_enum_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -414,7 +419,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_fixed_name"] { + for s in ["space.record_name", "space.inner_fixed_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -450,7 +455,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_fixed_name"] { + for s in ["space.record_name", "space.inner_fixed_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -492,7 +497,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_record_name"] { + for s in ["space.record_name", "inner_space.inner_record_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -529,7 +534,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_enum_name"] { + for s in ["space.record_name", "inner_space.inner_enum_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -566,7 +571,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_fixed_name"] { + for s in ["space.record_name", "inner_space.inner_fixed_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -619,7 +624,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 3); - for s in &[ + for s in [ "space.record_name", "space.middle_record_name", "space.inner_record_name", @@ -677,7 +682,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 3); - for s in &[ + for s in [ "space.record_name", "middle_namespace.middle_record_name", "middle_namespace.inner_record_name", @@ -736,7 +741,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 3); - for s in &[ + for s in [ "space.record_name", "middle_namespace.middle_record_name", "inner_namespace.inner_record_name", @@ -781,7 +786,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.in_array_record"] { + for s in ["space.record_name", "space.in_array_record"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -822,7 +827,7 @@ mod tests { let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::new(&schema)?; assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.in_map_record"] { + for s in ["space.record_name", "space.in_map_record"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -861,7 +866,7 @@ mod tests { // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_enum_name"] { + for s in ["space.record_name", "inner_space.inner_enum_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -905,7 +910,7 @@ mod tests { // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_fixed_name"] { + for s in ["space.record_name", "inner_space.inner_fixed_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); } @@ -1015,7 +1020,7 @@ mod tests { let mut known_schemata: NamesRef = HashMap::default(); known_schemata.insert("duplicated_name".try_into()?, &Schema::Boolean); - let result = ResolvedSchema::new_with_known_schemata(vec![&schema], &None, &known_schemata) + let result = ResolvedSchema::new_with_known_schemata(vec![&schema], None, &known_schemata) .unwrap_err(); assert_eq!( diff --git a/avro/src/schema/union.rs b/avro/src/schema/union.rs index 938e765..31affe7 100644 --- a/avro/src/schema/union.rs +++ b/avro/src/schema/union.rs @@ -17,7 +17,7 @@ use crate::error::Details; use crate::schema::{ - DecimalSchema, InnerDecimalSchema, Name, Namespace, Schema, SchemaKind, UuidSchema, + DecimalSchema, InnerDecimalSchema, Name, NamespaceRef, Schema, SchemaKind, UuidSchema, }; use crate::types; use crate::{AvroResult, Error}; @@ -88,7 +88,7 @@ impl UnionSchema { &self, value: &types::Value, known_schemata: Option<&HashMap<Name, S>>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Option<(usize, &Schema)> { let known_schemata_if_none = HashMap::new(); let known_schemata = known_schemata.unwrap_or(&known_schemata_if_none); @@ -101,11 +101,7 @@ impl UnionSchema { let kind = schema.discriminant(); // Maps and arrays need to be checked if they actually match the value if kind == SchemaKind::Map || kind == SchemaKind::Array { - let namespace = if schema.namespace().is_some() { - &schema.namespace() - } else { - enclosing_namespace - }; + let namespace = schema.namespace().or(enclosing_namespace); // TODO: Do this without the clone value @@ -129,11 +125,7 @@ impl UnionSchema { s_kind == kind || s_kind == SchemaKind::Ref }) .find(|(_i, schema)| { - let namespace = if schema.namespace().is_some() { - &schema.namespace() - } else { - enclosing_namespace - }; + let namespace = schema.namespace().or(enclosing_namespace); // TODO: Do this without the clone value @@ -151,11 +143,7 @@ impl UnionSchema { (None, None) => { // Slow path, check if value can be promoted to any of the types in the union self.schemas.iter().enumerate().find(|(_i, schema)| { - let namespace = if schema.namespace().is_some() { - &schema.namespace() - } else { - enclosing_namespace - }; + let namespace = schema.namespace().or(enclosing_namespace); // TODO: Do this without the clone value @@ -575,7 +563,7 @@ mod tests { assert!( union - .find_schema_with_known_schemata(&value, None::<&HashMap<Name, Schema>>, &None) + .find_schema_with_known_schemata(&value, None::<&HashMap<Name, Schema>>, None) .is_none() ); @@ -588,7 +576,7 @@ mod tests { let value = Value::Int(42); assert_eq!( - union.find_schema_with_known_schemata(&value, None::<&HashMap<Name, Schema>>, &None), + union.find_schema_with_known_schemata(&value, None::<&HashMap<Name, Schema>>, None), Some((0, &Schema::Long)) ); @@ -609,7 +597,7 @@ mod tests { let value = Value::Fixed(16, vec![0; 16]); assert_eq!( - union.find_schema_with_known_schemata(&value, None::<&HashMap<Name, Schema>>, &None), + union.find_schema_with_known_schemata(&value, None::<&HashMap<Name, Schema>>, None), Some((0, &uuid)) ); diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index b172d94..8baafd3 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -182,11 +182,11 @@ impl Checker { // Compare unqualified names if the schemas have them if let Some(w_name) = writers_schema.name() && let Some(r_name) = readers_schema.name() - && w_name.name != r_name.name + && w_name.name() != r_name.name() { return Err(CompatibilityError::NameMismatch { - writer_name: w_name.name.clone(), - reader_name: r_name.name.clone(), + writer_name: w_name.name().into(), + reader_name: r_name.name().into(), }); } diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index aaa2e7e..0143a22 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -696,10 +696,7 @@ mod tests { let string = Schema::Uuid(UuidSchema::String); let bytes = Schema::Uuid(UuidSchema::Bytes); let mut fixed_schema = FixedSchema { - name: Name { - name: "some_name".to_string(), - namespace: None, - }, + name: Name::new("some_name")?, aliases: None, doc: None, size: 16, diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index e01f112..d979962 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -17,7 +17,7 @@ use crate::Schema; use crate::schema::{ - FixedSchema, Name, Namespace, RecordField, RecordSchema, UnionSchema, UuidSchema, + FixedSchema, Name, NamespaceRef, RecordField, RecordSchema, UnionSchema, UuidSchema, }; use std::borrow::Cow; use std::collections::{HashMap, HashSet}; @@ -190,15 +190,15 @@ use std::collections::{HashMap, HashSet}; /// /// 1. In combination with `#[serde(with = "path::to::module)]` /// -/// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut HashSet<Name>, &Namespace) -> Schema` -/// and `fn get_record_fields_in_ctxt(usize, &mut HashSet<Name>, &Namespace) -> Option<Vec<RecordField>>` in the module provided +/// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut HashSet<Name>, NamespaceRef) -> Schema` +/// and `fn get_record_fields_in_ctxt(usize, &mut HashSet<Name>, NamespaceRef) -> Option<Vec<RecordField>>` in the module provided /// to the Serde attribute. See [`AvroSchemaComponent`] for details on how to implement those /// functions. /// /// 2. By providing a function directly, `#[avro(with = some_fn)]`. /// /// To get the schema, it will call the function provided. It must have the signature -/// `fn(&mut HashSet<Name>, &Namespace) -> Schema`. When this is used for a `transparent` struct, the +/// `fn(&mut HashSet<Name>, NamespaceRef) -> Schema`. When this is used for a `transparent` struct, the /// default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used. /// This is only recommended for primitive types, as the default implementation cannot be efficiently /// implemented for complex types. @@ -223,16 +223,16 @@ pub trait AvroSchema { /// /// For example, you have a custom integer type: /// ``` -/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField}}; +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}}; /// # use std::collections::HashSet; /// // Make sure to implement `Serialize` and `Deserialize` to use the right serialization methods /// pub struct U24([u8; 3]); /// impl AvroSchemaComponent for U24 { -/// fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { +/// fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema { /// Schema::Int /// } /// -/// fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { /// None // A Schema::Int is not a Schema::Record so there are no fields to return /// } /// @@ -248,18 +248,18 @@ pub trait AvroSchema { /// To construct a schema for a type is "transparent", such as for smart pointers, simply /// pass through the arguments to the inner type: /// ``` -/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField}}; +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField}}; /// # use serde::{Serialize, Deserialize}; /// # use std::collections::HashSet; /// #[derive(Serialize, Deserialize)] /// #[serde(transparent)] // This attribute is important for all passthrough implementations! /// pub struct Transparent<T>(T); /// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> { -/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { +/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema { /// T::get_schema_in_ctxt(named_schemas, enclosing_namespace) /// } /// -/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> { /// T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace) /// } /// @@ -283,7 +283,7 @@ pub trait AvroSchema { /// - Implement `field_default()` if you want to use `#[serde(skip_serializing{,_if})]`. /// /// ``` -/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Namespace, RecordField, RecordSchema}}; +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, NamespaceRef, RecordField, RecordSchema}}; /// # use serde::{Serialize, Deserialize}; /// # use std::{time::Duration, collections::HashSet}; /// pub struct Foo { @@ -293,13 +293,13 @@ pub trait AvroSchema { /// } /// /// impl AvroSchemaComponent for Foo { -/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { +/// fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema { /// // Create the fully qualified name for your type given the enclosing namespace -/// let name = Name::new("Foo").unwrap().fully_qualified_name(enclosing_namespace); +/// let name = Name::new_with_enclosing_namespace("Foo", enclosing_namespace).expect("Name is valid"); /// if named_schemas.contains(&name) { /// Schema::Ref { name } /// } else { -/// let enclosing_namespace = &name.namespace; +/// let enclosing_namespace = name.namespace(); /// // Do this before you start creating the schema, as otherwise recursive types will cause infinite recursion. /// named_schemas.insert(name.clone()); /// let schema = Schema::Record(RecordSchema::builder() @@ -311,7 +311,7 @@ pub trait AvroSchema { /// } /// } /// -/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> { /// Some(vec![ /// RecordField::builder() /// .name("one") @@ -338,7 +338,7 @@ pub trait AvroSchemaComponent { /// Get the schema for this component fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema; /// Get the fields of this schema if it is a record. @@ -349,7 +349,7 @@ pub trait AvroSchemaComponent { /// implement this function when manually implementing this trait. fn get_record_fields_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Option<Vec<RecordField>> { get_record_fields_in_ctxt(named_schemas, enclosing_namespace, Self::get_schema_in_ctxt) } @@ -370,8 +370,8 @@ pub trait AvroSchemaComponent { /// This is public so the derive macro can use it for `#[avro(with = ||)]` and `#[avro(with = path)]` pub fn get_record_fields_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, - schema_fn: fn(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema, + enclosing_namespace: NamespaceRef, + schema_fn: fn(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema, ) -> Option<Vec<RecordField>> { let mut record = match schema_fn(named_schemas, enclosing_namespace) { Schema::Record(record) => record, @@ -485,18 +485,18 @@ where T: AvroSchemaComponent + ?Sized, { fn get_schema() -> Schema { - T::get_schema_in_ctxt(&mut HashSet::default(), &None) + T::get_schema_in_ctxt(&mut HashSet::default(), None) } } macro_rules! impl_schema ( ($type:ty, $variant_constructor:expr) => ( impl AvroSchemaComponent for $type { - fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { + fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema { $variant_constructor } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -521,11 +521,11 @@ impl_schema!((), Schema::Null); macro_rules! impl_passthrough_schema ( ($type:ty where T: AvroSchemaComponent + ?Sized $(+ $bound:tt)*) => ( impl<T: AvroSchemaComponent $(+ $bound)* + ?Sized> AvroSchemaComponent for $type { - fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema { T::get_schema_in_ctxt(named_schemas, enclosing_namespace) } - fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Option<Vec<RecordField>> { T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace) } @@ -545,11 +545,11 @@ impl_passthrough_schema!(std::sync::Mutex<T> where T: AvroSchemaComponent + ?Siz macro_rules! impl_array_schema ( ($type:ty where T: AvroSchemaComponent) => ( impl<T: AvroSchemaComponent> AvroSchemaComponent for $type { - fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: &Namespace) -> Schema { + fn get_schema_in_ctxt(named_schemas: &mut HashSet<Name>, enclosing_namespace: NamespaceRef) -> Schema { Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build() } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -567,12 +567,12 @@ where { fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build() } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -583,12 +583,12 @@ where { fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)).build() } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -599,7 +599,7 @@ where { fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { let variants = vec![ Schema::Null, @@ -611,7 +611,7 @@ where ) } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } @@ -626,11 +626,10 @@ impl AvroSchemaComponent for core::time::Duration { /// This is a lossy conversion as this Avro type does not store the amount of nanoseconds. fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { - let name = Name::new("duration") - .expect("Name is valid") - .fully_qualified_name(enclosing_namespace); + let name = Name::new_with_enclosing_namespace("duration", enclosing_namespace) + .expect("Name is valid"); if named_schemas.contains(&name) { Schema::Ref { name } } else { @@ -646,7 +645,7 @@ impl AvroSchemaComponent for core::time::Duration { } } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -657,11 +656,10 @@ impl AvroSchemaComponent for uuid::Uuid { /// The underlying schema is [`Schema::Fixed`] with a size of 16. fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { - let name = Name::new("uuid") - .expect("Name is valid") - .fully_qualified_name(enclosing_namespace); + let name = + Name::new_with_enclosing_namespace("uuid", enclosing_namespace).expect("Name is valid"); if named_schemas.contains(&name) { Schema::Ref { name } } else { @@ -677,7 +675,7 @@ impl AvroSchemaComponent for uuid::Uuid { } } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -686,11 +684,10 @@ impl AvroSchemaComponent for u64 { /// The schema is [`Schema::Fixed`] of size 8 with the name `u64`. fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { - let name = Name::new("u64") - .expect("Name is valid") - .fully_qualified_name(enclosing_namespace); + let name = + Name::new_with_enclosing_namespace("u64", enclosing_namespace).expect("Name is valid"); if named_schemas.contains(&name) { Schema::Ref { name } } else { @@ -706,7 +703,7 @@ impl AvroSchemaComponent for u64 { } } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -715,11 +712,10 @@ impl AvroSchemaComponent for u128 { /// The schema is [`Schema::Fixed`] of size 16 with the name `u128`. fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { - let name = Name::new("u128") - .expect("Name is valid") - .fully_qualified_name(enclosing_namespace); + let name = + Name::new_with_enclosing_namespace("u128", enclosing_namespace).expect("Name is valid"); if named_schemas.contains(&name) { Schema::Ref { name } } else { @@ -735,7 +731,7 @@ impl AvroSchemaComponent for u128 { } } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } @@ -744,11 +740,10 @@ impl AvroSchemaComponent for i128 { /// The schema is [`Schema::Fixed`] of size 16 with the name `i128`. fn get_schema_in_ctxt( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { - let name = Name::new("i128") - .expect("Name is valid") - .fully_qualified_name(enclosing_namespace); + let name = + Name::new_with_enclosing_namespace("i128", enclosing_namespace).expect("Name is valid"); if named_schemas.contains(&name) { Schema::Ref { name } } else { @@ -764,7 +759,7 @@ impl AvroSchemaComponent for i128 { } } - fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Option<Vec<RecordField>> { + fn get_record_fields_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Option<Vec<RecordField>> { None } } diff --git a/avro/src/serde/ser_schema/mod.rs b/avro/src/serde/ser_schema/mod.rs index b49f059..50db278 100644 --- a/avro/src/serde/ser_schema/mod.rs +++ b/avro/src/serde/ser_schema/mod.rs @@ -27,7 +27,7 @@ use crate::{ }; use bigdecimal::BigDecimal; use serde::{Serialize, ser}; -use std::{borrow::Cow, cmp::Ordering, collections::HashMap, io::Write, str::FromStr}; +use std::{cmp::Ordering, collections::HashMap, io::Write, str::FromStr}; const COLLECTION_SERIALIZER_ITEM_LIMIT: usize = 1024; const COLLECTION_SERIALIZER_DEFAULT_INIT_ITEM_CAPACITY: usize = 32; @@ -629,13 +629,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { } fn get_ref_schema(&self, name: &'s Name) -> Result<&'s Schema, Error> { - let full_name = match name.namespace { - Some(_) => Cow::Borrowed(name), - None => Cow::Owned(Name { - name: name.name.clone(), - namespace: self.enclosing_namespace.clone(), - }), - }; + let full_name = name.fully_qualified_name(self.enclosing_namespace.as_deref()); let ref_schema = self.names.get(full_name.as_ref()).copied(); @@ -795,7 +789,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { }; match schema { - Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name == "i128" => { + Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name() == "i128" => { self.writer .write_all(&value.to_le_bytes()) .map_err(Details::WriteBytes)?; @@ -804,7 +798,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { Schema::Union(union_schema) => { for (i, variant_schema) in union_schema.schemas.iter().enumerate() { match variant_schema { - Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name == "i128" => { + Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name() == "i128" => { encode_int(i as i32, &mut *self.writer)?; return self.serialize_i128_with_schema(value, variant_schema); } @@ -949,7 +943,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { i64::try_from(value).map_err(|cause| create_error(cause.to_string()))?; encode_long(long_value, &mut self.writer) } - Schema::Fixed(fixed) if fixed.size == 8 && fixed.name.name == "u64" => { + Schema::Fixed(fixed) if fixed.size == 8 && fixed.name.name() == "u64" => { self.writer .write_all(&value.to_le_bytes()) .map_err(Details::WriteBytes)?; @@ -972,7 +966,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { encode_int(i as i32, &mut *self.writer)?; return self.serialize_u64_with_schema(value, variant_schema); } - Schema::Fixed(fixed) if fixed.size == 8 && fixed.name.name == "u64" => { + Schema::Fixed(fixed) if fixed.size == 8 && fixed.name.name() == "u64" => { encode_int(i as i32, &mut *self.writer)?; return self.serialize_u64_with_schema(value, variant_schema); } @@ -998,7 +992,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { }; match schema { - Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name == "u128" => { + Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name() == "u128" => { self.writer .write_all(&value.to_le_bytes()) .map_err(Details::WriteBytes)?; @@ -1007,7 +1001,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { Schema::Union(union_schema) => { for (i, variant_schema) in union_schema.schemas.iter().enumerate() { match variant_schema { - Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name == "u128" => { + Schema::Fixed(fixed) if fixed.size == 16 && fixed.name.name() == "u128" => { encode_int(i as i32, &mut *self.writer)?; return self.serialize_u128_with_schema(value, variant_schema); } @@ -1108,7 +1102,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { match schema { Schema::String | Schema::Bytes => self.write_bytes(String::from(value).as_bytes()), - Schema::Fixed(fixed) if fixed.size == 4 && fixed.name.name == "char" => { + Schema::Fixed(fixed) if fixed.size == 4 && fixed.name.name() == "char" => { self.writer .write_all(&u32::from(value).to_le_bytes()) .map_err(Details::WriteBytes)?; @@ -1121,7 +1115,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { encode_int(i as i32, &mut *self.writer)?; return self.serialize_char_with_schema(value, variant_schema); } - Schema::Fixed(fixed) if fixed.size == 4 && fixed.name.name == "char" => { + Schema::Fixed(fixed) if fixed.size == 4 && fixed.name.name() == "char" => { encode_int(i as i32, &mut *self.writer)?; return self.serialize_char_with_schema(value, variant_schema); } @@ -1791,7 +1785,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { for (i, variant_schema) in union_schema.schemas.iter().enumerate() { match variant_schema { Schema::Record(inner) - if inner.fields.len() == len && inner.name.name == name => + if inner.fields.len() == len && inner.name.name() == name => { encode_int(i as i32, &mut *self.writer)?; return self.serialize_struct_with_schema(name, len, variant_schema); diff --git a/avro/src/serde/with.rs b/avro/src/serde/with.rs index 9d236d1..29744e1 100644 --- a/avro/src/serde/with.rs +++ b/avro/src/serde/with.rs @@ -98,13 +98,14 @@ pub mod bytes { use serde::{Deserializer, Serializer}; + use crate::schema::NamespaceRef; use crate::{ Schema, - schema::{Name, Namespace, RecordField}, + schema::{Name, RecordField}, }; /// Returns [`Schema::Bytes`] - pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema { Schema::Bytes } @@ -112,7 +113,7 @@ pub mod bytes { pub fn get_record_fields_in_ctxt( _: usize, _: &mut HashSet<Name>, - _: &Namespace, + _: NamespaceRef, ) -> Option<Vec<RecordField>> { None } @@ -163,13 +164,14 @@ pub mod bytes_opt { use serde::{Deserializer, Serializer}; use std::{borrow::Borrow, collections::HashSet}; + use crate::schema::NamespaceRef; use crate::{ Schema, - schema::{Name, Namespace, RecordField, UnionSchema}, + schema::{Name, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Bytes)` - pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema { Schema::Union( UnionSchema::new(vec![Schema::Null, Schema::Bytes]).expect("This is a valid union"), ) @@ -179,7 +181,7 @@ pub mod bytes_opt { pub fn get_record_fields_in_ctxt( _: usize, _: &mut HashSet<Name>, - _: &Namespace, + _: NamespaceRef, ) -> Option<Vec<RecordField>> { None } @@ -232,19 +234,22 @@ pub mod fixed { use super::BytesType; use serde::{Deserializer, Serializer}; + use crate::schema::NamespaceRef; use crate::{ Schema, - schema::{FixedSchema, Name, Namespace, RecordField}, + schema::{FixedSchema, Name, RecordField}, }; /// Returns `Schema::Fixed(N)` named `serde_avro_fixed_{N}` pub fn get_schema_in_ctxt<const N: usize>( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { - let name = Name::new(&format!("serde_avro_fixed_{N}")) - .expect("Name is valid") - .fully_qualified_name(enclosing_namespace); + let name = Name::new_with_enclosing_namespace( + format!("serde_avro_fixed_{N}"), + enclosing_namespace, + ) + .expect("Name is valid"); if named_schemas.contains(&name) { Schema::Ref { name } } else { @@ -258,7 +263,7 @@ pub mod fixed { pub fn get_record_fields_in_ctxt( _: usize, _: &mut HashSet<Name>, - _: &Namespace, + _: NamespaceRef, ) -> Option<Vec<RecordField>> { None } @@ -309,15 +314,16 @@ pub mod fixed_opt { use serde::{Deserializer, Serializer}; use std::{borrow::Borrow, collections::HashSet}; + use crate::schema::NamespaceRef; use crate::{ Schema, - schema::{Name, Namespace, RecordField, UnionSchema}, + schema::{Name, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Fixed(N))` where the fixed schema is named `serde_avro_fixed_{N}` pub fn get_schema_in_ctxt<const N: usize>( named_schemas: &mut HashSet<Name>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Schema { Schema::Union( UnionSchema::new(vec![ @@ -332,7 +338,7 @@ pub mod fixed_opt { pub fn get_record_fields_in_ctxt( _: usize, _: &mut HashSet<Name>, - _: &Namespace, + _: NamespaceRef, ) -> Option<Vec<RecordField>> { None } @@ -387,13 +393,14 @@ pub mod slice { use serde::{Deserializer, Serializer}; + use crate::schema::NamespaceRef; use crate::{ Schema, - schema::{Name, Namespace, RecordField}, + schema::{Name, RecordField}, }; /// Returns [`Schema::Bytes`] - pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema { Schema::Bytes } @@ -401,7 +408,7 @@ pub mod slice { pub fn get_record_fields_in_ctxt( _: usize, _: &mut HashSet<Name>, - _: &Namespace, + _: NamespaceRef, ) -> Option<Vec<RecordField>> { None } @@ -455,13 +462,14 @@ pub mod slice_opt { use serde::{Deserializer, Serializer}; use std::{borrow::Borrow, collections::HashSet}; + use crate::schema::NamespaceRef; use crate::{ Schema, - schema::{Name, Namespace, RecordField, UnionSchema}, + schema::{Name, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Bytes)` - pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: &Namespace) -> Schema { + pub fn get_schema_in_ctxt(_: &mut HashSet<Name>, _: NamespaceRef) -> Schema { Schema::Union( UnionSchema::new(vec![Schema::Null, Schema::Bytes]).expect("This is a valid union"), ) @@ -471,7 +479,7 @@ pub mod slice_opt { pub fn get_record_fields_in_ctxt( _: usize, _: &mut HashSet<Name>, - _: &Namespace, + _: NamespaceRef, ) -> Option<Vec<RecordField>> { None } diff --git a/avro/src/types.rs b/avro/src/types.rs index 0b23dc9..90ea2b0 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -16,7 +16,7 @@ // under the License. //! Logic handling the intermediate representation of Avro values. -use crate::schema::{InnerDecimalSchema, UuidSchema}; +use crate::schema::{InnerDecimalSchema, NamespaceRef, UuidSchema}; use crate::{ AvroResult, Error, bigdecimal::{deserialize_big_decimal, serialize_big_decimal}, @@ -24,8 +24,8 @@ use crate::{ duration::Duration, error::Details, schema::{ - DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, Precision, RecordField, - RecordSchema, ResolvedSchema, Scale, Schema, SchemaKind, UnionSchema, + DecimalSchema, EnumSchema, FixedSchema, Name, Precision, RecordField, RecordSchema, + ResolvedSchema, Scale, Schema, SchemaKind, UnionSchema, }, }; use bigdecimal::BigDecimal; @@ -383,7 +383,7 @@ impl Value { schemata.iter().any(|schema| { let enclosing_namespace = schema.namespace(); - match self.validate_internal(schema, rs.get_names(), &enclosing_namespace) { + match self.validate_internal(schema, rs.get_names(), enclosing_namespace) { Some(reason) => { let log_message = format!("Invalid value: {self:?} for schema: {schema:?}. Reason: {reason}"); @@ -409,11 +409,11 @@ impl Value { } /// Validates the value against the provided schema. - pub(crate) fn validate_internal<S: std::borrow::Borrow<Schema> + Debug>( + pub(crate) fn validate_internal<S: Borrow<Schema> + Debug>( &self, schema: &Schema, names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Option<String> { match (self, schema) { (_, Schema::Ref { name }) => { @@ -426,7 +426,7 @@ impl Value { names.keys() )) }, - |s| self.validate_internal(s.borrow(), names, &name.namespace), + |s| self.validate_internal(s.borrow(), names, name.namespace()), ) } (&Value::Null, &Schema::Null) => None, @@ -607,11 +607,7 @@ impl Value { record_fields .iter() .fold(None, |acc, (field_name, record_field)| { - let record_namespace = if name.namespace.is_none() { - enclosing_namespace - } else { - &name.namespace - }; + let record_namespace = name.namespace().or(enclosing_namespace); match lookup.get(field_name) { Some(idx) => { let field = &fields[*idx]; @@ -678,14 +674,14 @@ impl Value { } else { ResolvedSchema::try_from(schemata)? }; - self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) + self.resolve_internal(schema, rs.get_names(), enclosing_namespace, &None) } pub(crate) fn resolve_internal<S: Borrow<Schema> + Debug>( mut self, schema: &Schema, names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, field_default: &Option<JsonValue>, ) -> AvroResult<Self> { // Check if this schema is a union, and if the reader schema is not. @@ -705,10 +701,10 @@ impl Value { if let Some(resolved) = names.get(&name) { debug!("Resolved {name:?}"); - self.resolve_internal(resolved.borrow(), names, &name.namespace, field_default) + self.resolve_internal(resolved.borrow(), names, name.namespace(), field_default) } else { error!("Failed to resolve schema {name:?}"); - Err(Details::SchemaResolutionError(name.clone()).into()) + Err(Details::SchemaResolutionError(name.into_owned()).into()) } } Schema::Null => self.resolve_null(), @@ -1082,7 +1078,7 @@ impl Value { self, schema: &UnionSchema, names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, field_default: &Option<JsonValue>, ) -> Result<Self, Error> { let v = match self { @@ -1108,7 +1104,7 @@ impl Value { self, schema: &Schema, names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Result<Self, Error> { match self { Value::Array(items) => Ok(Value::Array( @@ -1129,7 +1125,7 @@ impl Value { self, schema: &Schema, names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Result<Self, Error> { match self { Value::Map(items) => Ok(Value::Map( @@ -1154,7 +1150,7 @@ impl Value { self, fields: &[RecordField], names: &HashMap<Name, S>, - enclosing_namespace: &Namespace, + enclosing_namespace: NamespaceRef, ) -> Result<Self, Error> { let mut items = match self { Value::Map(items) => Ok(items), @@ -1395,7 +1391,7 @@ mod tests { attributes: BTreeMap::new(), }), false, - r#"Invalid value: Fixed(11, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) for schema: Duration(FixedSchema { name: Name { name: "TestName", namespace: None }, size: 12, .. }). Reason: The value's size ('11') must be exactly 12 to be a Duration"#, + r#"Invalid value: Fixed(11, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) for schema: Duration(FixedSchema { name: Name { name: "TestName", .. }, size: 12, .. }). Reason: The value's size ('11') must be exactly 12 to be a Duration"#, ), ( Value::Record(vec![("unknown_field_name".to_string(), Value::Null)]), @@ -1413,7 +1409,7 @@ mod tests { attributes: Default::default(), }), false, - r#"Invalid value: Record([("unknown_field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, fields: [RecordField { name: "field_name", schema: Int, .. }], .. }). Reason: There is no schema field for field 'unknown_field_name'"#, + r#"Invalid value: Record([("unknown_field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", .. }, fields: [RecordField { name: "field_name", schema: Int, .. }], .. }). Reason: There is no schema field for field 'unknown_field_name'"#, ), ( Value::Record(vec![("field_name".to_string(), Value::Null)]), @@ -1433,13 +1429,12 @@ mod tests { attributes: Default::default(), }), false, - r#"Invalid value: Record([("field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, fields: [RecordField { name: "field_name", schema: Ref { name: Name { name: "missing", namespace: None } }, .. }], .. }). Reason: Unresolved schema reference: 'Name { name: "missing", namespace: None }'. Parsed names: []"#, + r#"Invalid value: Record([("field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", .. }, fields: [RecordField { name: "field_name", schema: Ref { name: Name { name: "missing", .. } }, .. }], .. }). Reason: Unresolved schema reference: 'Name { name: "missing", .. }'. Parsed names: []"#, ), ]; for (value, schema, valid, expected_err_message) in value_schema_valid.into_iter() { - let err_message = - value.validate_internal::<Schema>(&schema, &HashMap::default(), &None); + let err_message = value.validate_internal::<Schema>(&schema, &HashMap::default(), None); assert_eq!(valid, err_message.is_none()); if !valid { let full_err_message = format!( @@ -1633,7 +1628,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Boolean(false)), ("b", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: Unsupported value-schema combination! Value: Boolean(false), schema: Long"#, + r#"Invalid value: Record([("a", Boolean(false)), ("b", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", .. }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: Unsupported value-schema combination! Value: Boolean(false), schema: Long"#, ); let value = Value::Record(vec![ @@ -1642,7 +1637,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("c", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: Could not find matching type in union"#, + r#"Invalid value: Record([("a", Long(42)), ("c", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", .. }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: Could not find matching type in union"#, ); assert_not_logged( r#"Invalid value: String("foo") for schema: Int. Reason: Unsupported value-schema combination"#, @@ -1654,7 +1649,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("d", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: There is no schema field for field 'd'"#, + r#"Invalid value: Record([("a", Long(42)), ("d", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", .. }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: There is no schema field for field 'd'"#, ); let value = Value::Record(vec![ @@ -1665,7 +1660,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null), ("d", Null)]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: The value's records length (4) is greater than the schema's (3 fields)"#, + r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null), ("d", Null)]) for schema: Record(RecordSchema { name: Name { name: "some_record", .. }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: The value's records length (4) is greater than the schema's (3 fields)"#, ); assert!( @@ -1689,7 +1684,7 @@ mod tests { .validate(&schema) ); assert_logged( - r#"Invalid value: Map({"d": Long(123)}) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: Field with name '"a"' is not a member of the map items + r#"Invalid value: Map({"d": Long(123)}) for schema: Record(RecordSchema { name: Name { name: "some_record", .. }, fields: [RecordField { name: "a", schema: Long, .. }, RecordField { name: "b", schema: String, .. }, RecordField { name: "c", default: Null, schema: Union(UnionSchema { schemas: [Null, Int] }), .. }], .. }). Reason: Field with name '"a"' is not a member of the map items Field with name '"b"' is not a member of the map items"#, ); @@ -1959,10 +1954,7 @@ Field with name '"b"' is not a member of the map items"#, value .clone() .resolve(&Schema::Uuid(UuidSchema::Fixed(FixedSchema { - name: Name { - name: "some_name".to_string(), - namespace: None - }, + name: Name::new("some_name")?, aliases: None, doc: None, size: 16, diff --git a/avro/src/util.rs b/avro/src/util.rs index 3127f8d..8728c9e 100644 --- a/avro/src/util.rs +++ b/avro/src/util.rs @@ -41,24 +41,22 @@ pub const DEFAULT_SERDE_HUMAN_READABLE: bool = false; pub(crate) static SERDE_HUMAN_READABLE: OnceLock<bool> = OnceLock::new(); pub(crate) trait MapHelper { - fn string(&self, key: &str) -> Option<String>; + fn string(&self, key: &str) -> Option<&str>; - fn name(&self) -> Option<String> { + fn name(&self) -> Option<&str> { self.string("name") } fn doc(&self) -> Documentation { - self.string("doc") + self.string("doc").map(Into::into) } fn aliases(&self) -> Option<Vec<String>>; } impl MapHelper for Map<String, Value> { - fn string(&self, key: &str) -> Option<String> { - self.get(key) - .and_then(|v| v.as_str()) - .map(|v| v.to_string()) + fn string(&self, key: &str) -> Option<&str> { + self.get(key).and_then(|v| v.as_str()) } fn aliases(&self) -> Option<Vec<String>> { diff --git a/avro/src/validator.rs b/avro/src/validator.rs index 0162bce..9165b3e 100644 --- a/avro/src/validator.rs +++ b/avro/src/validator.rs @@ -17,27 +17,33 @@ //! # Custom name validation //! -//! By default, the library follows the rules specified in the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names). +//! By default, the library follows the rules specified in the [Avro specification](https://avro.apache.org/docs/++version++/specification/#names). //! //! Some of the other Apache Avro language SDKs are more flexible in their name validation. For //! interoperability with those SDKs, the library provides a way to customize the name validation. //! //! ``` -//! use apache_avro::AvroResult; -//! use apache_avro::schema::Namespace; -//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; +//! # use apache_avro::{AvroResult, validator::{SchemaNameValidator, set_schema_name_validator}}; +//! # use regex_lite::Regex; +//! # use std::sync::OnceLock; +//! struct DontAllowNamespaces; //! -//! struct MyCustomValidator; -//! -//! impl SchemaNameValidator for MyCustomValidator { -//! fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { -//! todo!() +//! impl SchemaNameValidator for DontAllowNamespaces { +//! fn regex(&self) -> &'static Regex { +//! static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); +//! SCHEMA_NAME_ONCE.get_or_init(|| { +//! Regex::new( +//! // Disallows any namespace. By naming the group `name`, the default +//! // implementation of `SchemaNameValidator::validate` can be reused. +//! r"^(?P<name>[A-Za-z_][A-Za-z0-9_]*)$", +//! ).expect("Regex is valid") +//! }) //! } //! } //! //! // don't parse any schema before registering the custom validator(s)! //! -//! if set_schema_name_validator(Box::new(MyCustomValidator)).is_err() { +//! if set_schema_name_validator(Box::new(DontAllowNamespaces)).is_err() { //! // `.unwrap()` doesn't work as the return type does not implement `Debug` //! panic!("There was already a schema validator configured") //! } @@ -51,7 +57,7 @@ //! If the application parses schemas before setting a validator, the default validator will be //! registered and used! -use crate::{AvroResult, error::Details, schema::Namespace}; +use crate::{AvroResult, error::Details}; use log::debug; use regex_lite::Regex; use std::sync::OnceLock; @@ -65,6 +71,9 @@ struct SpecificationValidator; pub trait SchemaNameValidator: Send + Sync { /// The regex used to validate the schema name. /// + /// When the name part of the full name is provided as a capture group named `name`, the + /// default implementation of [`Self::validate`] can be used. + /// /// The default implementation uses the Avro specified regex. fn regex(&self) -> &'static Regex { static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); @@ -77,25 +86,26 @@ pub trait SchemaNameValidator: Send + Sync { }) } - /// Validates the schema name and returns the name and the optional namespace. + /// Validates the schema name and returns the start byte of the name. + /// + /// Requires that the implementation of [`Self::regex`] provides a capture group named `name` + /// that captures the name part of the full name. /// /// Should return [`Details::InvalidSchemaName`] if it is invalid. - fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>; -} - -impl SchemaNameValidator for SpecificationValidator { - fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { + fn validate(&self, schema_name: &str) -> AvroResult<usize> { let regex = SchemaNameValidator::regex(self); let caps = regex .captures(schema_name) .ok_or_else(|| Details::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?; - Ok(( - caps["name"].to_string(), - caps.name("namespace").map(|s| s.as_str().to_string()), - )) + Ok(caps + .name("name") + .expect("Regex has no group named `name`") + .start()) } } +impl SchemaNameValidator for SpecificationValidator {} + static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNameValidator + Send + Sync>> = OnceLock::new(); /// Sets a custom schema name validator. @@ -111,7 +121,7 @@ pub fn set_schema_name_validator( NAME_VALIDATOR_ONCE.set(validator) } -pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Namespace)> { +pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<usize> { NAME_VALIDATOR_ONCE .get_or_init(|| { debug!("Going to use the default name validator."); @@ -137,20 +147,18 @@ pub trait SchemaNamespaceValidator: Send + Sync { /// Validates a schema namespace. /// /// Should return [`Details::InvalidNamespace`] if it is invalid. - fn validate(&self, namespace: &str) -> AvroResult<()>; -} - -impl SchemaNamespaceValidator for SpecificationValidator { - fn validate(&self, ns: &str) -> AvroResult<()> { + fn validate(&self, namespace: &str) -> AvroResult<()> { let regex = SchemaNamespaceValidator::regex(self); - if !regex.is_match(ns) { - Err(Details::InvalidNamespace(ns.to_string(), regex.as_str()).into()) + if !regex.is_match(namespace) { + Err(Details::InvalidNamespace(namespace.to_string(), regex.as_str()).into()) } else { Ok(()) } } } +impl SchemaNamespaceValidator for SpecificationValidator {} + static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNamespaceValidator + Send + Sync>> = OnceLock::new(); @@ -190,10 +198,6 @@ pub trait EnumSymbolNameValidator: Send + Sync { /// Validate the symbol of an enum. /// /// Should return [`Details::EnumSymbolName`] if it is invalid. - fn validate(&self, name: &str) -> AvroResult<()>; -} - -impl EnumSymbolNameValidator for SpecificationValidator { fn validate(&self, symbol: &str) -> AvroResult<()> { let regex = EnumSymbolNameValidator::regex(self); if !regex.is_match(symbol) { @@ -204,6 +208,8 @@ impl EnumSymbolNameValidator for SpecificationValidator { } } +impl EnumSymbolNameValidator for SpecificationValidator {} + static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn EnumSymbolNameValidator + Send + Sync>> = OnceLock::new(); @@ -243,10 +249,6 @@ pub trait RecordFieldNameValidator: Send + Sync { /// Validate the name of a record field. /// /// Should return [`Details::FieldName`] if it is invalid. - fn validate(&self, name: &str) -> AvroResult<()>; -} - -impl RecordFieldNameValidator for SpecificationValidator { fn validate(&self, field_name: &str) -> AvroResult<()> { let regex = RecordFieldNameValidator::regex(self); if !regex.is_match(field_name) { @@ -257,6 +259,8 @@ impl RecordFieldNameValidator for SpecificationValidator { } } +impl RecordFieldNameValidator for SpecificationValidator {} + static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn RecordFieldNameValidator + Send + Sync>> = OnceLock::new(); diff --git a/avro/src/writer.rs b/avro/src/writer.rs index 3c9bf86..fe264a1 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -201,7 +201,7 @@ impl<'a, W: Write> Writer<'a, W> { if let Some(reason) = value.validate_internal( self.schema, self.resolved_schema.get_names(), - &self.schema.namespace(), + self.schema.namespace(), ) { return Err(Details::ValidationWithReason { value: value.clone(), @@ -246,7 +246,7 @@ impl<'a, W: Write> Writer<'a, W> { value, self.schema, self.resolved_schema.get_names(), - &self.schema.namespace(), + self.schema.namespace(), &mut self.buffer, )?; @@ -641,10 +641,10 @@ fn write_avro_datum_schemata<T: Into<Value>>( let rs = ResolvedSchema::try_from(schemata)?; let names = rs.get_names(); let enclosing_namespace = schema.namespace(); - if let Some(_err) = avro.validate_internal(schema, names, &enclosing_namespace) { + if let Some(_err) = avro.validate_internal(schema, names, enclosing_namespace) { return Err(Details::Validation.into()); } - encode_internal(&avro, schema, names, &enclosing_namespace, buffer) + encode_internal(&avro, schema, names, enclosing_namespace, buffer) } /// Writer that encodes messages according to the single object encoding v1 spec @@ -811,7 +811,7 @@ fn write_value_ref_owned_resolved<W: Write>( if let Some(reason) = value.validate_internal( root_schema, resolved_schema.get_names(), - &root_schema.namespace(), + root_schema.namespace(), ) { return Err(Details::ValidationWithReason { value: value.clone(), @@ -824,7 +824,7 @@ fn write_value_ref_owned_resolved<W: Write>( value, root_schema, resolved_schema.get_names(), - &root_schema.namespace(), + root_schema.namespace(), writer, ) } @@ -1785,7 +1785,7 @@ mod tests { Err(e) => { assert_eq!( e.to_string(), - r#"Failed to serialize field 'time' for record Record(RecordSchema { name: Name { name: "Conference", namespace: None }, fields: [RecordField { name: "name", schema: String, .. }, RecordField { name: "date", aliases: ["time2", "time"], schema: Union(UnionSchema { schemas: [Null, Long] }), .. }], .. }): Failed to serialize value of type f64 using schema Union(UnionSchema { schemas: [Null, Long] }): 12345678.9. Cause: Cannot find a Double schema in [Null, Long]"# + r#"Failed to serialize field 'time' for record Record(RecordSchema { name: Name { name: "Conference", .. }, fields: [RecordField { name: "name", schema: String, .. }, RecordField { name: "date", aliases: ["time2", "time"], schema: Union(UnionSchema { schemas: [Null, Long] }), .. }], .. }): Failed to serialize value of type f64 using schema Union(UnionSchema { schemas: [Null, Long] }): 12345678.9. Cause: Cannot find a Double schema in [Null, Long]"# ); } } diff --git a/avro/tests/avro_schema_component.rs b/avro/tests/avro_schema_component.rs index 596f35f..ca9e31a 100644 --- a/avro/tests/avro_schema_component.rs +++ b/avro/tests/avro_schema_component.rs @@ -20,7 +20,7 @@ use std::collections::HashSet; #[test] fn avro_rs_394_avro_schema_component_without_derive_feature() { - let schema = i32::get_schema_in_ctxt(&mut HashSet::default(), &None); + let schema = i32::get_schema_in_ctxt(&mut HashSet::default(), None); assert!(matches!(schema, Schema::Int)); } @@ -29,5 +29,5 @@ fn avro_rs_394_avro_schema_component_without_derive_feature() { fn avro_rs_394_avro_schema_component_nested_options() { type VeryOptional = Option<Option<i32>>; - let _schema = VeryOptional::get_schema_in_ctxt(&mut HashSet::default(), &None); + let _schema = VeryOptional::get_schema_in_ctxt(&mut HashSet::default(), None); } diff --git a/avro/tests/get_record_fields.rs b/avro/tests/get_record_fields.rs index 765fda6..5bab721 100644 --- a/avro/tests/get_record_fields.rs +++ b/avro/tests/get_record_fields.rs @@ -33,7 +33,7 @@ fn avro_rs_448_default_get_record_fields_no_recursion() -> TestResult { let mut named_schemas = HashSet::new(); let fields = - get_record_fields_in_ctxt(&mut named_schemas, &None, Foo::get_schema_in_ctxt).unwrap(); + get_record_fields_in_ctxt(&mut named_schemas, None, Foo::get_schema_in_ctxt).unwrap(); assert_eq!(fields.len(), 2); assert!( @@ -42,7 +42,7 @@ fn avro_rs_448_default_get_record_fields_no_recursion() -> TestResult { ); // Insert Foo into named_schemas - match Foo::get_schema_in_ctxt(&mut named_schemas, &None) { + match Foo::get_schema_in_ctxt(&mut named_schemas, None) { Schema::Record(_) => {} schema => panic!("Expected a record got {schema:?}"), } @@ -53,7 +53,7 @@ fn avro_rs_448_default_get_record_fields_no_recursion() -> TestResult { ); let fields = - get_record_fields_in_ctxt(&mut named_schemas, &None, Foo::get_schema_in_ctxt).unwrap(); + get_record_fields_in_ctxt(&mut named_schemas, None, Foo::get_schema_in_ctxt).unwrap(); assert_eq!(fields.len(), 2); assert_eq!( named_schemas.len(), @@ -74,7 +74,7 @@ fn avro_rs_448_default_get_record_fields_recursion() -> TestResult { let mut named_schemas = HashSet::new(); let fields = - get_record_fields_in_ctxt(&mut named_schemas, &None, Foo::get_schema_in_ctxt).unwrap(); + get_record_fields_in_ctxt(&mut named_schemas, None, Foo::get_schema_in_ctxt).unwrap(); assert_eq!(fields.len(), 2); assert_eq!( @@ -84,14 +84,14 @@ fn avro_rs_448_default_get_record_fields_recursion() -> TestResult { ); // Insert Foo into named_schemas - match Foo::get_schema_in_ctxt(&mut named_schemas, &None) { + match Foo::get_schema_in_ctxt(&mut named_schemas, None) { Schema::Ref { name: _ } => {} schema => panic!("Expected a ref got {schema:?}"), } assert_eq!(named_schemas.len(), 1); let fields = - get_record_fields_in_ctxt(&mut named_schemas, &None, Foo::get_schema_in_ctxt).unwrap(); + get_record_fields_in_ctxt(&mut named_schemas, None, Foo::get_schema_in_ctxt).unwrap(); assert_eq!(fields.len(), 2); assert_eq!( named_schemas.len(), diff --git a/avro/tests/schema.rs b/avro/tests/schema.rs index e7fb61f..3347e83 100644 --- a/avro/tests/schema.rs +++ b/avro/tests/schema.rs @@ -73,7 +73,7 @@ fn test_correct_recursive_extraction() -> TestResult { .. }) = &inner_fields[0].schema { - assert_eq!("X", recursive_type.name.as_str()); + assert_eq!("X", recursive_type.name()); } } else { panic!("inner schema {inner_schema:?} should have been a record") @@ -598,8 +598,8 @@ fn test_fullname_name_and_namespace_specified() -> TestResult { fn test_fullname_fullname_and_namespace_specified() -> TestResult { init(); let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h"}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); + assert_eq!(name.name(), "d"); + assert_eq!(name.namespace(), Some("a.b.c")); let fullname = name.fullname(None); assert_eq!("a.b.c.d", fullname); Ok(()) @@ -609,9 +609,9 @@ fn test_fullname_fullname_and_namespace_specified() -> TestResult { fn test_fullname_name_and_default_namespace_specified() -> TestResult { init(); let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": null}"#)?; - assert_eq!(&name.name, "a"); - assert_eq!(name.namespace, None); - let fullname = name.fullname(Some("b.c.d".into())); + assert_eq!(name.name(), "a"); + assert_eq!(name.namespace(), None); + let fullname = name.fullname(Some("b.c.d")); assert_eq!("b.c.d.a", fullname); Ok(()) } @@ -620,9 +620,9 @@ fn test_fullname_name_and_default_namespace_specified() -> TestResult { fn test_fullname_fullname_and_default_namespace_specified() -> TestResult { init(); let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); - let fullname = name.fullname(Some("o.a.h".into())); + assert_eq!(name.name(), "d"); + assert_eq!(name.namespace(), Some("a.b.c")); + let fullname = name.fullname(Some("o.a.h")); assert_eq!("a.b.c.d", fullname); Ok(()) } @@ -631,8 +631,8 @@ fn test_fullname_fullname_and_default_namespace_specified() -> TestResult { fn test_avro_3452_parsing_name_without_namespace() -> TestResult { init(); let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d"}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); + assert_eq!(name.name(), "d"); + assert_eq!(name.namespace(), Some("a.b.c")); let fullname = name.fullname(None); assert_eq!("a.b.c.d", fullname); Ok(()) @@ -642,8 +642,8 @@ fn test_avro_3452_parsing_name_without_namespace() -> TestResult { fn test_avro_3452_parsing_name_with_leading_dot_without_namespace() -> TestResult { init(); let name: Name = serde_json::from_str(r#"{"name": ".a"}"#)?; - assert_eq!(&name.name, "a"); - assert_eq!(name.namespace, None); + assert_eq!(name.name(), "a"); + assert_eq!(name.namespace(), None); assert_eq!("a", name.fullname(None)); Ok(()) } @@ -662,9 +662,9 @@ fn test_fullname_fullname_namespace_and_default_namespace_specified() -> TestRes init(); let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", "aliases": null}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); - let fullname = name.fullname(Some("o.a.h".into())); + assert_eq!(name.name(), "d"); + assert_eq!(name.namespace(), Some("a.b.c")); + let fullname = name.fullname(Some("o.a.h")); assert_eq!("a.b.c.d", fullname); Ok(()) } @@ -674,9 +674,9 @@ fn test_fullname_name_namespace_and_default_namespace_specified() -> TestResult init(); let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": null}"#)?; - assert_eq!(&name.name, "a"); - assert_eq!(name.namespace, Some("o.a.a".to_owned())); - let fullname = name.fullname(Some("o.a.h".into())); + assert_eq!(name.name(), "a"); + assert_eq!(name.namespace(), Some("o.a.a")); + let fullname = name.fullname(Some("o.a.h")); assert_eq!("o.a.a.a", fullname); Ok(()) } diff --git a/avro/tests/serde_human_readable_true.rs b/avro/tests/serde_human_readable_true.rs index c71a91e..d1c71d7 100644 --- a/avro/tests/serde_human_readable_true.rs +++ b/avro/tests/serde_human_readable_true.rs @@ -129,7 +129,7 @@ fn avro_rs_440_uuid_fixed() -> TestResult { let writer = SpecificSingleObjectWriter::new()?; assert_eq!( writer.write(uuid, &mut buffer).unwrap_err().to_string(), - r#"Failed to serialize value of type string using schema Uuid(Fixed(FixedSchema { name: Name { name: "uuid", namespace: None }, size: 16, .. })): 550e8400-e29b-41d4-a716-446655440000. Cause: Expected bytes but got a string. Did you mean to use `Schema::Uuid(UuidSchema::String)` or `utils::serde_set_human_readable(false)`?"# + r#"Failed to serialize value of type string using schema Uuid(Fixed(FixedSchema { name: Name { name: "uuid", .. }, size: 16, .. })): 550e8400-e29b-41d4-a716-446655440000. Cause: Expected bytes but got a string. Did you mean to use `Schema::Uuid(UuidSchema::String)` or `utils::serde_set_human_readable(false)`?"# ); Ok(()) diff --git a/avro/tests/validators.rs b/avro/tests/validators.rs index 55659be..3325c1f 100644 --- a/avro/tests/validators.rs +++ b/avro/tests/validators.rs @@ -17,7 +17,6 @@ use apache_avro::{ AvroResult, - schema::Namespace, validator::{ EnumSymbolNameValidator, RecordFieldNameValidator, SchemaNameValidator, SchemaNamespaceValidator, set_enum_symbol_name_validator, set_record_field_name_validator, @@ -32,8 +31,8 @@ struct CustomValidator; // because the parsing will trigger the validation and will // setup the default validator (SpecificationValidator)! impl SchemaNameValidator for CustomValidator { - fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { - Ok((schema_name.to_string(), None)) + fn validate(&self, _schema_name: &str) -> AvroResult<usize> { + Ok(0) } } diff --git a/avro_derive/src/lib.rs b/avro_derive/src/lib.rs index 51d0117..52de29b 100644 --- a/avro_derive/src/lib.rs +++ b/avro_derive/src/lib.rs @@ -118,11 +118,11 @@ fn create_trait_definition( quote! { #[automatically_derived] impl #impl_generics ::apache_avro::AvroSchemaComponent for #ident #ty_generics #where_clause { - fn get_schema_in_ctxt(named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: &::std::option::Option<::std::string::String>) -> ::apache_avro::schema::Schema { + fn get_schema_in_ctxt(named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: ::apache_avro::schema::NamespaceRef) -> ::apache_avro::schema::Schema { #get_schema_impl } - fn get_record_fields_in_ctxt(named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: &::std::option::Option<::std::string::String>) -> ::std::option::Option<::std::vec::Vec<::apache_avro::schema::RecordField>> { + fn get_record_fields_in_ctxt(named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, enclosing_namespace: ::apache_avro::schema::NamespaceRef) -> ::std::option::Option<::std::vec::Vec<::apache_avro::schema::RecordField>> { #get_record_fields_impl } @@ -136,11 +136,11 @@ fn create_trait_definition( /// Generate the code to check `named_schemas` if this schema already exist fn handle_named_schemas(full_schema_name: String, schema_def: TokenStream) -> TokenStream { quote! { - let name = ::apache_avro::schema::Name::new(#full_schema_name).expect(concat!("Unable to parse schema name ", #full_schema_name)).fully_qualified_name(enclosing_namespace); + let name = ::apache_avro::schema::Name::new_with_enclosing_namespace(#full_schema_name, enclosing_namespace).expect(concat!("Unable to parse schema name ", #full_schema_name)); if named_schemas.contains(&name) { ::apache_avro::schema::Schema::Ref{name} } else { - let enclosing_namespace = &name.namespace; + let enclosing_namespace = name.namespace(); named_schemas.insert(name.clone()); #schema_def } @@ -597,15 +597,14 @@ mod tests { impl ::apache_avro::AvroSchemaComponent for Basic { fn get_schema_in_ctxt( named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, - enclosing_namespace: &::std::option::Option<::std::string::String> + enclosing_namespace: ::apache_avro::schema::NamespaceRef ) -> ::apache_avro::schema::Schema { - let name = ::apache_avro::schema::Name::new("Basic") - .expect(concat!("Unable to parse schema name ", "Basic")) - .fully_qualified_name(enclosing_namespace); + let name = ::apache_avro::schema::Name::new_with_enclosing_namespace("Basic", enclosing_namespace) + .expect(concat!("Unable to parse schema name ", "Basic")); if named_schemas.contains(&name) { ::apache_avro::schema::Schema::Ref { name } } else { - let enclosing_namespace = &name.namespace; + let enclosing_namespace = name.namespace(); named_schemas.insert(name.clone()); ::apache_avro::schema::Schema::Enum(apache_avro::schema::EnumSchema { name: ::apache_avro::schema::Name::new("Basic").expect( @@ -627,7 +626,7 @@ mod tests { fn get_record_fields_in_ctxt( named_schemas: &mut ::std::collections::HashSet<::apache_avro::schema::Name>, - enclosing_namespace: &::std::option::Option<::std::string::String> + enclosing_namespace: ::apache_avro::schema::NamespaceRef ) -> ::std::option::Option <::std::vec::Vec<::apache_avro::schema::RecordField>> { None } @@ -761,7 +760,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_struct) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qual [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : :: apache_avro :: schema :: NamespaceRef) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new_with_enclosing_namespace ("A" , enclosing_namespace) . expect (concat ! ("Unable to parse schema [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -780,7 +779,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_enum) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qual [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : :: apache_avro :: schema :: NamespaceRef) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new_with_enclosing_namespace ("A" , enclosing_namespace) . expect (concat ! ("Unable to parse schema [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -803,7 +802,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_struct) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qual [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : :: apache_avro :: schema :: NamespaceRef) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new_with_enclosing_namespace ("A" , enclosing_namespace) . expect (concat ! ("Unable to parse schema [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -823,7 +822,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_enum) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for B { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new ("B") . expect (concat ! ("Unable to parse schema name " , "B")) . fully_qual [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for B { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : :: apache_avro :: schema :: NamespaceRef) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new_with_enclosing_namespace ("B" , enclosing_namespace) . expect (concat ! ("Unable to parse schema [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } @@ -847,7 +846,7 @@ mod tests { match syn::parse2::<DeriveInput>(test_struct) { Ok(input) => { let schema_res = derive_avro_schema(input); - let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : & :: std :: option :: Option < :: std :: string :: String >) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new ("A") . expect (concat ! ("Unable to parse schema name " , "A")) . fully_qual [...] + let expected_token_stream = r#"# [automatically_derived] impl :: apache_avro :: AvroSchemaComponent for A { fn get_schema_in_ctxt (named_schemas : & mut :: std :: collections :: HashSet < :: apache_avro :: schema :: Name > , enclosing_namespace : :: apache_avro :: schema :: NamespaceRef) -> :: apache_avro :: schema :: Schema { let name = :: apache_avro :: schema :: Name :: new_with_enclosing_namespace ("A" , enclosing_namespace) . expect (concat ! ("Unable to parse schema [...] let schema_token_stream = schema_res.unwrap().to_string(); assert_eq!(schema_token_stream, expected_token_stream); } diff --git a/avro_derive/tests/derive.rs b/avro_derive/tests/derive.rs index b6855b8..ae366e3 100644 --- a/avro_derive/tests/derive.rs +++ b/avro_derive/tests/derive.rs @@ -17,7 +17,7 @@ use apache_avro::{ AvroSchema, AvroSchemaComponent, Reader, Schema, Writer, from_value, - schema::{Alias, EnumSchema, FixedSchema, Name, Namespace, RecordSchema}, + schema::{Alias, EnumSchema, FixedSchema, Name, RecordSchema}, }; use proptest::prelude::*; use serde::{Deserialize, Serialize, de::DeserializeOwned}; @@ -29,6 +29,7 @@ use std::{ }; use uuid::Uuid; +use apache_avro::schema::NamespaceRef; use pretty_assertions::assert_eq; /// Takes in a type that implements the right combination of traits and runs it through a Serde Cycle and asserts the result is the same @@ -141,7 +142,7 @@ fn test_basic_namespace() { let schema = Schema::parse_str(schema).unwrap(); assert_eq!(schema, TestBasicNamespace::get_schema()); if let Schema::Record(RecordSchema { name, .. }) = TestBasicNamespace::get_schema() { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()) + assert_eq!(Some("com.testing.namespace"), name.namespace()) } else { panic!("TestBasicNamespace schema must be a record schema") } @@ -188,20 +189,14 @@ fn test_complex_namespace() { let schema = Schema::parse_str(schema).unwrap(); assert_eq!(schema, TestComplexNamespace::get_schema()); if let Schema::Record(RecordSchema { name, fields, .. }) = TestComplexNamespace::get_schema() { - assert_eq!( - "com.testing.complex.namespace".to_owned(), - name.namespace.unwrap() - ); + assert_eq!(Some("com.testing.complex.namespace"), name.namespace()); let inner_schema = fields .iter() .filter(|field| field.name == "a") .map(|field| &field.schema) .next(); if let Some(Schema::Record(RecordSchema { name, .. })) = inner_schema { - assert_eq!( - "com.testing.namespace".to_owned(), - name.namespace.clone().unwrap() - ) + assert_eq!(Some("com.testing.namespace"), name.namespace()) } else { panic!("Field 'a' must have a record schema") } @@ -239,8 +234,8 @@ fn avro_rs_239_test_named_record() { let schema = Schema::parse_str(schema).unwrap(); assert_eq!(schema, TestNamedRecord::get_schema()); if let Schema::Record(RecordSchema { name, .. }) = TestNamedRecord::get_schema() { - assert_eq!("Other", name.name.as_str()); - assert_eq!(Some("com.testing.namespace"), name.namespace.as_deref()) + assert_eq!("Other", name.name()); + assert_eq!(Some("com.testing.namespace"), name.namespace()) } else { panic!("TestNamedRecord schema must be a record schema") } @@ -1024,7 +1019,7 @@ fn test_basic_with_attributes() { "#; let schema = Schema::parse_str(schema).unwrap(); if let Schema::Record(RecordSchema { name, doc, .. }) = TestBasicWithAttributes::get_schema() { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); + assert_eq!(Some("com.testing.namespace"), name.namespace()); assert_eq!("A Documented Record", doc.unwrap()) } else { panic!("TestBasicWithAttributes schema must be a record schema") @@ -1067,7 +1062,7 @@ fn test_basic_with_out_doc_attributes() { let derived_schema = TestBasicWithOuterDocAttributes::get_schema(); assert_eq!(&schema, &derived_schema); if let Schema::Record(RecordSchema { name, doc, .. }) = derived_schema { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); + assert_eq!(Some("com.testing.namespace"), name.namespace()); assert_eq!("A Documented Record", doc.unwrap()) } else { panic!("TestBasicWithOuterDocAttributes schema must be a record schema") @@ -1109,7 +1104,7 @@ fn test_basic_with_large_doc() { "#; let schema = Schema::parse_str(schema).unwrap(); if let Schema::Record(RecordSchema { name, doc, .. }) = TestBasicWithLargeDoc::get_schema() { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); + assert_eq!(Some("com.testing.namespace"), name.namespace()); assert_eq!( "A Documented Record\nthat spans\nmultiple lines", doc.unwrap() @@ -1860,15 +1855,19 @@ fn avro_rs_397_with() { ) .unwrap(); - fn long_schema(_named_schemas: &mut HashSet<Name>, _enclosing_namespace: &Namespace) -> Schema { + fn long_schema( + _named_schemas: &mut HashSet<Name>, + _enclosing_namespace: NamespaceRef, + ) -> Schema { Schema::Long } mod module { use super::*; + use apache_avro::schema::NamespaceRef; pub fn get_schema_in_ctxt( _named_schemas: &mut HashSet<Name>, - _enclosing_namespace: &Namespace, + _enclosing_namespace: NamespaceRef, ) -> Schema { Schema::Bytes } @@ -1913,10 +1912,10 @@ fn avro_rs_397_with_generic() { fn generic<const N: usize>( _named_schemas: &mut HashSet<Name>, - _enclosing_namespace: &Namespace, + _enclosing_namespace: NamespaceRef, ) -> Schema { Schema::Fixed(FixedSchema { - name: Name::new(&format!("fixed_{N}")).unwrap(), + name: Name::new(format!("fixed_{N}")).unwrap(), aliases: None, doc: None, size: N, @@ -2004,7 +2003,10 @@ fn avro_rs_397_derive_with_expr_lambda() { #[test] fn avro_rs_398_transparent_with_skip() { - fn long_schema(_named_schemas: &mut HashSet<Name>, _enclosing_namespace: &Namespace) -> Schema { + fn long_schema( + _named_schemas: &mut HashSet<Name>, + _enclosing_namespace: NamespaceRef, + ) -> Schema { Schema::Long } @@ -2310,7 +2312,7 @@ fn avro_rs_448_transparent_with() { let mut named_schemas = HashSet::new(); assert_eq!( - TestStruct::get_record_fields_in_ctxt(&mut named_schemas, &None), + TestStruct::get_record_fields_in_ctxt(&mut named_schemas, None), None ); assert!( @@ -2335,21 +2337,21 @@ fn avro_rs_448_transparent_with_2() { } let mut named_schemas = HashSet::new(); - let fields = TestStruct::get_record_fields_in_ctxt(&mut named_schemas, &None).unwrap(); + let fields = TestStruct::get_record_fields_in_ctxt(&mut named_schemas, None).unwrap(); assert!( named_schemas.is_empty(), "No name should've been added: {named_schemas:?}" ); assert_eq!(fields.len(), 2); - TestStruct::get_schema_in_ctxt(&mut named_schemas, &None); + TestStruct::get_schema_in_ctxt(&mut named_schemas, None); assert_eq!( named_schemas.len(), 1, "One name should've been added: {named_schemas:?}" ); - let fields = TestStruct::get_record_fields_in_ctxt(&mut named_schemas, &None).unwrap(); + let fields = TestStruct::get_record_fields_in_ctxt(&mut named_schemas, None).unwrap(); assert_eq!( named_schemas.len(), 1,
