Kriskras99 commented on code in PR #496: URL: https://github.com/apache/avro-rs/pull/496#discussion_r2888890069
########## avro/src/reader/datum.rs: ########## @@ -0,0 +1,351 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::Read; + +use bon::bon; + +use crate::{AvroResult, Schema, decode::decode_internal, schema::ResolvedSchema, types::Value}; + +/// Reader for reading raw Avro data. +/// +/// This is most likely not what you need. Most users should use [`Reader`][crate::Reader], +/// [`GenericSingleObjectReader`][crate::GenericSingleObjectReader], or +/// [`SpecificSingleObjectReader`][crate::SpecificSingleObjectReader] instead. +pub struct GenericDatumReader<'s> { + writer: &'s Schema, + resolved: ResolvedSchema<'s>, + reader: Option<(&'s Schema, ResolvedSchema<'s>)>, +} + +#[bon] +impl<'s> GenericDatumReader<'s> { + /// Build a [`DatumReader`]. + /// + /// This is most likely not what you need. Most users should use [`Reader`][crate::Reader], + /// [`GenericSingleObjectReader`][crate::GenericSingleObjectReader], or + /// [`SpecificSingleObjectReader`][crate::SpecificSingleObjectReader] instead. + #[builder] + pub fn new( + /// The schema that was used to write the Avro datum. + #[builder(start_fn)] + writer_schema: &'s Schema, + /// Already resolved schemata that will be used to resolve references in the writer's schema. + resolved_writer_schemata: Option<ResolvedSchema<'s>>, + /// The schema that will be used to resolve the value to conform the the new schema. + reader_schema: Option<&'s Schema>, + /// Already resolved schemata that will be used to resolve references in the reader's schema. + resolved_reader_schemata: Option<ResolvedSchema<'s>>, + ) -> AvroResult<Self> { + let resolved_writer_schemata = if let Some(resolved) = resolved_writer_schemata { + resolved + } else { + ResolvedSchema::try_from(writer_schema)? + }; + + let reader = if let Some(reader) = reader_schema { + if let Some(resolved) = resolved_reader_schemata { + Some((reader, resolved)) + } else { + Some((reader, ResolvedSchema::try_from(reader)?)) + } + } else { + None + }; + + Ok(Self { + writer: writer_schema, + resolved: resolved_writer_schemata, + reader, + }) + } +} + +impl<'s, S: generic_datum_reader_builder::State> GenericDatumReaderBuilder<'s, S> { + /// Set the schemata that will be used to resolve any references in the writer's schema. + /// + /// This is equivalent to `.resolved_writer_schemata(ResolvedSchema::new_with_schemata(schemata)?)`. + /// If you already have a [`ResolvedSchema`], use that function instead. + pub fn writer_schemata( + self, + schemata: Vec<&'s Schema>, + ) -> AvroResult< + GenericDatumReaderBuilder<'s, generic_datum_reader_builder::SetResolvedWriterSchemata<S>>, + > + where + S::ResolvedWriterSchemata: generic_datum_reader_builder::IsUnset, + { + let resolved = ResolvedSchema::new_with_schemata(schemata)?; + Ok(self.resolved_writer_schemata(resolved)) + } + + /// Set the schemata that will be used to resolve any references in the reader's schema. + /// + /// This is equivalent to `.resolved_reader_schemata(ResolvedSchema::new_with_schemata(schemata)?)`. + /// If you already have a [`ResolvedSchema`], use that function instead. + /// + /// This function can only be called after the reader schema is set. + pub fn reader_schemata( + self, + schemata: Vec<&'s Schema>, + ) -> AvroResult< + GenericDatumReaderBuilder<'s, generic_datum_reader_builder::SetResolvedReaderSchemata<S>>, + > + where + S::ResolvedReaderSchemata: generic_datum_reader_builder::IsUnset, + S::ReaderSchema: generic_datum_reader_builder::IsSet, + { + let resolved = ResolvedSchema::new_with_schemata(schemata)?; + Ok(self.resolved_reader_schemata(resolved)) + } +} + +impl<'s> GenericDatumReader<'s> { + /// Read a Avro datum from the reader. + pub fn read_value<R: Read>(&self, reader: &mut R) -> AvroResult<Value> { + let value = decode_internal(self.writer, self.resolved.get_names(), None, reader)?; + if let Some((reader, resolved)) = &self.reader { + value.resolve_internal(reader, resolved.get_names(), None, &None) + } else { + Ok(value) + } + } +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```ignore +/// GenericDatumReader::builder(writer_schema) +/// .maybe_reader_schema(reader_schema) +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` encoded in Avro format given its `Schema` and anything implementing `io::Read` +/// to read from. +/// +/// In case a reader `Schema` is provided, schema resolution will also be performed. +/// +/// **NOTE** This function has a quite small niche of usage and does NOT take care of reading the +/// header and consecutive data blocks; use [`Reader`](struct.Reader.html) if you don't know what +/// you are doing, instead. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum<R: Read>( + writer_schema: &Schema, + reader: &mut R, + reader_schema: Option<&Schema>, +) -> AvroResult<Value> { + GenericDatumReader::builder(writer_schema) + .maybe_reader_schema(reader_schema) + .build()? + .read_value(reader) +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```ignore +/// GenericDatumReader::builder(writer_schema) +/// .writer_schemata(writer_schemata)? +/// .maybe_reader_schema(reader_schema) +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` from raw Avro data. +/// +/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided +/// schemata to resolve any dependencies. +/// +/// When a reader `Schema` is provided, schema resolution will also be performed. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum_schemata<R: Read>( + writer_schema: &Schema, + writer_schemata: Vec<&Schema>, + reader: &mut R, + reader_schema: Option<&Schema>, +) -> AvroResult<Value> { + GenericDatumReader::builder(writer_schema) + .writer_schemata(writer_schemata)? + .maybe_reader_schema(reader_schema) + .build()? + .read_value(reader) +} + +/// Deprecated. +/// +/// This is equivalent to +/// ```ignore +/// GenericDatumReader::builder(writer_schema) +/// .writer_schemata(writer_schemata)? +/// .maybe_reader_schema(reader_schema) +/// .reader_schemata(reader_schemata)? +/// .build()? +/// .read_value(reader) +/// ``` +/// +/// Decode a `Value` from raw Avro data. +/// +/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided +/// schemata to resolve any dependencies. +/// +/// When a reader `Schema` is provided, schema resolution will also be performed. +#[deprecated(since = "0.22.0", note = "Use `DatumReader` instead")] +pub fn from_avro_datum_reader_schemata<R: Read>( + writer_schema: &Schema, + writer_schemata: Vec<&Schema>, + reader: &mut R, + reader_schema: Option<&Schema>, + reader_schemata: Vec<&Schema>, +) -> AvroResult<Value> { + GenericDatumReader::builder(writer_schema) + .writer_schemata(writer_schemata)? + .maybe_reader_schema(reader_schema) + .reader_schemata(reader_schemata)? Review Comment: It's not a problem, the value will just be ignored. I put the extra bound on the `reader_schemata` function just to make it easier for users to do the right thing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
