This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch less-cloning-resolved-schema in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit fa31905519448ef3903ebadf42f699ed18c1d1d6 Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Tue Jan 27 09:52:15 2026 +0200 chore:Less cloning while dealing with ResolvedSchema Return reference fpr get_schemata() Use internal mutation for ResolvedSchema::names_ref --- avro/src/decode.rs | 2 +- avro/src/encode.rs | 2 +- avro/src/reader.rs | 2 +- avro/src/schema/resolve.rs | 35 +++++++++++++++++++---------------- avro/src/schema/union.rs | 4 ++-- avro/src/serde/ser_schema.rs | 12 ++++++------ avro/src/types.rs | 4 ++-- avro/src/writer.rs | 17 +++++++---------- 8 files changed, 39 insertions(+), 39 deletions(-) diff --git a/avro/src/decode.rs b/avro/src/decode.rs index 1adc095..ccb9d48 100644 --- a/avro/src/decode.rs +++ b/avro/src/decode.rs @@ -74,7 +74,7 @@ fn decode_seq_len<R: Read>(reader: &mut R) -> AvroResult<usize> { /// Decode a `Value` from avro format given its `Schema`. pub fn decode<R: Read>(schema: &Schema, reader: &mut R) -> AvroResult<Value> { let rs = ResolvedSchema::try_from(schema)?; - decode_internal(schema, rs.get_names(), &None, reader) + decode_internal(schema, &rs.get_names(), &None, reader) } pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( diff --git a/avro/src/encode.rs b/avro/src/encode.rs index 85c4461..33384b9 100644 --- a/avro/src/encode.rs +++ b/avro/src/encode.rs @@ -37,7 +37,7 @@ use std::{borrow::Borrow, collections::HashMap, io::Write}; /// encoding for complex type values. pub fn encode<W: Write>(value: &Value, schema: &Schema, writer: &mut W) -> AvroResult<usize> { let rs = ResolvedSchema::try_from(schema)?; - encode_internal(value, schema, rs.get_names(), &None, writer) + encode_internal(value, schema, &rs.get_names(), &None, writer) } /// Encode `s` as the _bytes_ primitive type. diff --git a/avro/src/reader.rs b/avro/src/reader.rs index 967edb5..aafc263 100644 --- a/avro/src/reader.rs +++ b/avro/src/reader.rs @@ -495,7 +495,7 @@ pub fn from_avro_datum_reader_schemata<R: Read>( reader_schemata: Vec<&Schema>, ) -> AvroResult<Value> { let rs = ResolvedSchema::try_from(writer_schemata)?; - let value = decode_internal(writer_schema, rs.get_names(), &None, reader)?; + let value = decode_internal(writer_schema, &rs.get_names(), &None, reader)?; match reader_schema { Some(schema) => { if reader_schemata.is_empty() { diff --git a/avro/src/schema/resolve.rs b/avro/src/schema/resolve.rs index e82b87d..31b3754 100644 --- a/avro/src/schema/resolve.rs +++ b/avro/src/schema/resolve.rs @@ -22,21 +22,22 @@ use crate::schema::{ }; use crate::{AvroResult, Error, Schema}; use std::borrow::Borrow; +use std::cell::{Ref, RefCell}; use std::collections::HashMap; #[derive(Debug)] pub struct ResolvedSchema<'s> { - pub(super) names_ref: NamesRef<'s>, + names_ref: RefCell<NamesRef<'s>>, schemata: Vec<&'s Schema>, } impl<'s> ResolvedSchema<'s> { - pub fn get_schemata(&self) -> Vec<&'s Schema> { - self.schemata.clone() + pub fn get_schemata(&self) -> &[&'s Schema] { + &self.schemata } - pub fn get_names(&self) -> &NamesRef<'s> { - &self.names_ref + pub fn get_names(&self) -> Ref<'_, NamesRef<'s>> { + self.names_ref.borrow() } /// Resolve all references in this schema. @@ -52,8 +53,8 @@ impl<'s> ResolvedSchema<'s> { /// These schemas will be resolved in order, so references to schemas later in the /// list is not supported. pub fn new_with_schemata(schemata: Vec<&'s Schema>) -> AvroResult<Self> { - let mut rs = ResolvedSchema { - names_ref: HashMap::new(), + let rs = ResolvedSchema { + names_ref: RefCell::new(HashMap::new()), schemata, }; rs.resolve(rs.get_schemata(), &None, None)?; @@ -69,8 +70,8 @@ impl<'s> ResolvedSchema<'s> { known_schemata: &'n NamesRef<'n>, ) -> AvroResult<Self> { let names = HashMap::new(); - let mut rs = ResolvedSchema { - names_ref: names, + let rs = ResolvedSchema { + names_ref: RefCell::new(names), schemata: schemata_to_resolve, }; rs.resolve(rs.get_schemata(), enclosing_namespace, Some(known_schemata))?; @@ -78,22 +79,22 @@ impl<'s> ResolvedSchema<'s> { } fn resolve<'n>( - &mut self, - schemata: Vec<&'s Schema>, + &self, + schemata: &[&'s Schema], enclosing_namespace: &Namespace, known_schemata: Option<&'n NamesRef<'n>>, ) -> AvroResult<()> { for schema in schemata { match schema { Schema::Array(schema) => { - self.resolve(vec![&schema.items], enclosing_namespace, known_schemata)? + self.resolve(&[&schema.items], enclosing_namespace, known_schemata)? } Schema::Map(schema) => { - self.resolve(vec![&schema.types], enclosing_namespace, known_schemata)? + self.resolve(&[&schema.types], enclosing_namespace, known_schemata)? } Schema::Union(UnionSchema { schemas, .. }) => { for schema in schemas { - self.resolve(vec![schema], enclosing_namespace, known_schemata)? + self.resolve(&[schema], enclosing_namespace, known_schemata)? } } Schema::Enum(EnumSchema { name, .. }) @@ -107,6 +108,7 @@ impl<'s> ResolvedSchema<'s> { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if self .names_ref + .borrow_mut() .insert(fully_qualified_name.clone(), schema) .is_some() { @@ -117,6 +119,7 @@ impl<'s> ResolvedSchema<'s> { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if self .names_ref + .borrow_mut() .insert(fully_qualified_name.clone(), schema) .is_some() { @@ -124,14 +127,14 @@ impl<'s> ResolvedSchema<'s> { } else { let record_namespace = fully_qualified_name.namespace; for field in fields { - self.resolve(vec![&field.schema], &record_namespace, known_schemata)? + self.resolve(&[&field.schema], &record_namespace, known_schemata)? } } } Schema::Ref { name } => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); // first search for reference in current schemata, then look into external references. - if !self.names_ref.contains_key(&fully_qualified_name) { + if !self.get_names().contains_key(&fully_qualified_name) { let is_resolved_with_known_schemas = known_schemata .as_ref() .map(|names| names.contains_key(&fully_qualified_name)) diff --git a/avro/src/schema/union.rs b/avro/src/schema/union.rs index 5bf631a..c9a5c5b 100644 --- a/avro/src/schema/union.rs +++ b/avro/src/schema/union.rs @@ -108,10 +108,10 @@ impl UnionSchema { &collected_names, ) .expect("Schema didn't successfully parse"); - let resolved_names = resolved_schema.names_ref; + let resolved_names = resolved_schema.get_names(); // extend known schemas with just resolved names - collected_names.extend(resolved_names); + collected_names.extend(resolved_names.clone()); let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone()); value diff --git a/avro/src/serde/ser_schema.rs b/avro/src/serde/ser_schema.rs index 2cfe380..df22bd0 100644 --- a/avro/src/serde/ser_schema.rs +++ b/avro/src/serde/ser_schema.rs @@ -3031,8 +3031,8 @@ mod tests { assert!(!crate::util::is_human_readable()); let mut buffer: Vec<u8> = Vec::new(); let rs = ResolvedSchema::try_from(&schema)?; - let mut serializer = - SchemaAwareWriteSerializer::new(&mut buffer, &schema, rs.get_names(), None); + let names = rs.get_names(); + let mut serializer = SchemaAwareWriteSerializer::new(&mut buffer, &schema, &names, None); let good_record = TestRecord { string_field: String::from("test"), @@ -3112,8 +3112,8 @@ mod tests { let mut buffer: Vec<u8> = Vec::new(); let rs = ResolvedSchema::try_from(&schema)?; - let mut serializer = - SchemaAwareWriteSerializer::new(&mut buffer, &schema, rs.get_names(), None); + let names = rs.get_names(); + let mut serializer = SchemaAwareWriteSerializer::new(&mut buffer, &schema, &names, None); let foo_record = TestRecord { inner_union: InnerUnion::InnerVariantFoo(InnerRecordFoo { @@ -3189,8 +3189,8 @@ mod tests { let mut buffer: Vec<u8> = Vec::new(); let rs = ResolvedSchema::try_from(&schema)?; - let mut serializer = - SchemaAwareWriteSerializer::new(&mut buffer, &schema, rs.get_names(), None); + let names = rs.get_names(); + let mut serializer = SchemaAwareWriteSerializer::new(&mut buffer, &schema, &names, None); let null_record = TestRecord { inner_union: None }; null_record.serialize(&mut serializer)?; diff --git a/avro/src/types.rs b/avro/src/types.rs index 0060987..658473a 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -372,7 +372,7 @@ impl Value { schemata.iter().any(|schema| { let enclosing_namespace = schema.namespace(); - match self.validate_internal(schema, rs.get_names(), &enclosing_namespace) { + match self.validate_internal(schema, &rs.get_names(), &enclosing_namespace) { Some(reason) => { let log_message = format!("Invalid value: {self:?} for schema: {schema:?}. Reason: {reason}"); @@ -667,7 +667,7 @@ impl Value { } else { ResolvedSchema::try_from(schemata)? }; - self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) + self.resolve_internal(schema, &rs.get_names(), &enclosing_namespace, &None) } pub(crate) fn resolve_internal<S: Borrow<Schema> + Debug>( diff --git a/avro/src/writer.rs b/avro/src/writer.rs index 72f87d0..b1c61df 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -218,15 +218,12 @@ impl<'a, W: Write> Writer<'a, W> { /// written, then call [`flush`](Writer::flush). pub fn append_ser<S: Serialize>(&mut self, value: S) -> AvroResult<usize> { let n = self.maybe_write_header()?; - - let mut serializer = SchemaAwareWriteSerializer::new( - &mut self.buffer, - self.schema, - self.resolved_schema.get_names(), - None, - ); + let names = self.resolved_schema.get_names(); + let mut serializer = + SchemaAwareWriteSerializer::new(&mut self.buffer, self.schema, &names, None); value.serialize(&mut serializer)?; self.num_values += 1; + drop(names); if self.buffer.len() >= self.block_size { return self.flush().map(|b| b + n); @@ -525,7 +522,7 @@ fn write_avro_datum_schemata<T: Into<Value>>( ) -> AvroResult<usize> { let avro = value.into(); let rs = ResolvedSchema::try_from(schemata)?; - let names = rs.get_names(); + let names = &rs.get_names(); let enclosing_namespace = schema.namespace(); if let Some(_err) = avro.validate_internal(schema, names, &enclosing_namespace) { return Err(Details::Validation.into()); @@ -665,7 +662,7 @@ fn write_value_ref_resolved( value: &Value, buffer: &mut Vec<u8>, ) -> AvroResult<usize> { - match value.validate_internal(schema, resolved_schema.get_names(), &schema.namespace()) { + match value.validate_internal(schema, &resolved_schema.get_names(), &schema.namespace()) { Some(reason) => Err(Details::ValidationWithReason { value: value.clone(), schema: schema.clone(), @@ -675,7 +672,7 @@ fn write_value_ref_resolved( None => encode_internal( value, schema, - resolved_schema.get_names(), + &resolved_schema.get_names(), &schema.namespace(), buffer, ),
