This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/full_enum_support in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit b430c7b37cc963fa4d82ff6ff80af42a92ccb0ca Author: Kriskras99 <[email protected]> AuthorDate: Fri Mar 20 08:29:47 2026 +0100 wip --- avro/src/bigdecimal.rs | 33 +++---- avro/src/error.rs | 6 +- avro/src/reader/block.rs | 20 ++-- avro/src/reader/datum.rs | 19 ++-- avro/src/reader/mod.rs | 16 ++-- avro/src/reader/single_object.rs | 33 ++++--- avro/src/schema/record/field.rs | 30 +++--- avro/src/schema/union.rs | 35 ++++--- avro/src/serde/derive.rs | 25 ++--- avro/src/serde/deser_schema/array.rs | 15 +-- avro/src/serde/deser_schema/enums.rs | 21 ++-- avro/src/serde/deser_schema/identifier.rs | 7 +- avro/src/serde/deser_schema/map.rs | 15 +-- avro/src/serde/deser_schema/mod.rs | 70 +++++++------- avro/src/serde/deser_schema/record.rs | 14 +-- avro/src/serde/deser_schema/tuple.rs | 13 ++- avro/src/serde/ser_schema/block.rs | 23 +++-- avro/src/serde/ser_schema/mod.rs | 72 +++++++++----- avro/src/serde/ser_schema/record/field_default.rs | 9 +- avro/src/serde/ser_schema/record/mod.rs | 111 ++++++++++++++++------ avro/src/serde/ser_schema/tuple.rs | 19 ++-- avro/src/serde/ser_schema/union.rs | 29 +++--- avro/src/serde/with.rs | 55 ++++++----- avro/src/writer/datum.rs | 5 +- avro/src/writer/mod.rs | 13 +-- avro/src/writer/single_object.rs | 12 +-- avro/tests/avro-rs-226.rs | 25 +++-- avro/tests/avro-rs-285-bytes_deserialization.rs | 17 ++-- avro/tests/schema.rs | 22 ++--- avro/tests/union_schema.rs | 82 +++++++--------- avro_derive/tests/derive.rs | 46 ++++----- avro_derive/tests/serde.rs | 20 ++-- 32 files changed, 528 insertions(+), 404 deletions(-) diff --git a/avro/src/bigdecimal.rs b/avro/src/bigdecimal.rs index ee26ee1..cd2c736 100644 --- a/avro/src/bigdecimal.rs +++ b/avro/src/bigdecimal.rs @@ -69,11 +69,6 @@ pub(crate) fn deserialize_big_decimal(mut bytes: &[u8]) -> AvroResult<BigDecimal #[cfg(test)] mod tests { - use super::*; - use crate::{Codec, Reader, Schema, Writer, error::Error, from_value, types::Record}; - use apache_avro_test_helper::TestResult; - use bigdecimal::{One, Zero}; - use pretty_assertions::assert_eq; use std::{ fs::File, io::BufReader, @@ -81,6 +76,16 @@ mod tests { str::FromStr, }; + use apache_avro_test_helper::TestResult; + use bigdecimal::{One, Zero}; + use pretty_assertions::assert_eq; + + use super::*; + use crate::{ + Codec, Reader, Schema, Writer, error::Error, reader::datum::GenericDatumReader, + types::Record, writer::datum::GenericDatumWriter, + }; + #[test] fn test_avro_3779_bigdecimal_serial() -> TestResult { let value: BigDecimal = @@ -206,18 +211,14 @@ mod tests { let test = Test::default(); - // write a record - let mut writer = Writer::new(&schema, Vec::new())?; - writer.append_ser(test.clone())?; - - let wrote_data = writer.into_inner()?; - - // read record - let mut reader = Reader::new(&wrote_data[..])?; - - let value = reader.next().unwrap()?; + let serialized = GenericDatumWriter::builder(&schema) + .build()? + .write_ser_to_vec(&test)?; + let value: Test = GenericDatumReader::builder(&schema) + .build()? + .read_deser(&mut &serialized[..])?; - assert_eq!(test, from_value::<Test>(&value)?); + assert_eq!(value, test); Ok(()) } diff --git a/avro/src/error.rs b/avro/src/error.rs index e4c09cd..c528739 100644 --- a/avro/src/error.rs +++ b/avro/src/error.rs @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::RecordSchema; +use std::{error::Error as _, fmt}; + use crate::{ - schema::{Name, Schema, SchemaKind, UnionSchema}, + schema::{Name, RecordSchema, Schema, SchemaKind, UnionSchema}, types::{Value, ValueKind}, }; -use std::{error::Error as _, fmt}; /// Errors encountered by Avro. /// diff --git a/avro/src/reader/block.rs b/avro/src/reader/block.rs index 1d80871..73daaf8 100644 --- a/avro/src/reader/block.rs +++ b/avro/src/reader/block.rs @@ -15,23 +15,25 @@ // specific language governing permissions and limitations // under the License. -use crate::serde::deser_schema::{Config, SchemaAwareDeserializer}; +use std::{ + collections::HashMap, + io::{ErrorKind, Read}, + str::FromStr, +}; + +use log::warn; +use serde::de::DeserializeOwned; +use serde_json::from_slice; + use crate::{ AvroResult, Codec, Error, decode::{decode, decode_internal}, error::Details, schema::{Names, Schema, resolve_names, resolve_names_with_schemata}, + serde::deser_schema::{Config, SchemaAwareDeserializer}, types::Value, util, }; -use log::warn; -use serde::de::DeserializeOwned; -use serde_json::from_slice; -use std::{ - collections::HashMap, - io::{ErrorKind, Read}, - str::FromStr, -}; /// Internal Block reader. #[derive(Debug, Clone)] diff --git a/avro/src/reader/datum.rs b/avro/src/reader/datum.rs index 6f5906f..b02f8db 100644 --- a/avro/src/reader/datum.rs +++ b/avro/src/reader/datum.rs @@ -15,16 +15,19 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::ResolvedOwnedSchema; -use crate::serde::deser_schema::{Config, SchemaAwareDeserializer}; -use crate::util::is_human_readable; -use crate::{ - AvroResult, AvroSchema, Schema, decode::decode_internal, schema::ResolvedSchema, types::Value, -}; +use std::{io::Read, marker::PhantomData}; + use bon::bon; use serde::de::DeserializeOwned; -use std::io::Read; -use std::marker::PhantomData; + +use crate::{ + AvroResult, AvroSchema, Schema, + decode::decode_internal, + schema::{ResolvedOwnedSchema, ResolvedSchema}, + serde::deser_schema::{Config, SchemaAwareDeserializer}, + types::Value, + util::is_human_readable, +}; /// Reader for reading raw Avro data. /// diff --git a/avro/src/reader/mod.rs b/avro/src/reader/mod.rs index 072f2df..d439407 100644 --- a/avro/src/reader/mod.rs +++ b/avro/src/reader/mod.rs @@ -21,13 +21,13 @@ mod block; pub mod datum; pub mod single_object; -use crate::util::is_human_readable; -use crate::{AvroResult, schema::Schema, types::Value}; +use std::{collections::HashMap, io::Read, marker::PhantomData}; + use block::Block; use bon::bon; use serde::de::DeserializeOwned; -use std::marker::PhantomData; -use std::{collections::HashMap, io::Read}; + +use crate::{AvroResult, schema::Schema, types::Value, util::is_human_readable}; /// Main interface for reading Avro formatted values. /// @@ -186,11 +186,13 @@ pub fn read_marker(bytes: &[u8]) -> [u8; 16] { #[cfg(test)] mod tests { - use super::*; - use crate::types::Record; + use std::io::Cursor; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; - use std::io::Cursor; + + use super::*; + use crate::types::Record; const SCHEMA: &str = r#" { diff --git a/avro/src/reader/single_object.rs b/avro/src/reader/single_object.rs index 47e002c..a26b513 100644 --- a/avro/src/reader/single_object.rs +++ b/avro/src/reader/single_object.rs @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. -use crate::decode::decode_internal; -use crate::error::Details; -use crate::headers::{HeaderBuilder, RabinFingerprintHeader}; -use crate::schema::ResolvedOwnedSchema; -use crate::serde::deser_schema::{Config, SchemaAwareDeserializer}; -use crate::types::Value; -use crate::util::is_human_readable; -use crate::{AvroResult, AvroSchema, Schema}; +use std::{io::Read, marker::PhantomData}; + use bon::bon; use serde::de::DeserializeOwned; -use std::io::Read; -use std::marker::PhantomData; + +use crate::{ + AvroResult, AvroSchema, Schema, + decode::decode_internal, + error::Details, + headers::{HeaderBuilder, RabinFingerprintHeader}, + schema::ResolvedOwnedSchema, + serde::deser_schema::{Config, SchemaAwareDeserializer}, + types::Value, + util::is_human_readable, +}; pub struct GenericSingleObjectReader { write_schema: ResolvedOwnedSchema, @@ -140,15 +143,15 @@ where #[cfg(test)] mod tests { - use super::*; - use crate::encode::encode; - use crate::headers::GlueSchemaUuidHeader; - use crate::rabin::Rabin; - use crate::{AvroSchema, Error, Schema}; use apache_avro_test_helper::TestResult; use serde::Deserialize; use uuid::Uuid; + use super::*; + use crate::{ + AvroSchema, Error, Schema, encode::encode, headers::GlueSchemaUuidHeader, rabin::Rabin, + }; + #[derive(Deserialize, Clone, PartialEq, Debug)] struct TestSingleObjectReader { a: i64, diff --git a/avro/src/schema/record/field.rs b/avro/src/schema/record/field.rs index 04b2861..f845e6a 100644 --- a/avro/src/schema/record/field.rs +++ b/avro/src/schema/record/field.rs @@ -15,18 +15,23 @@ // specific language governing permissions and limitations // under the License. -use crate::AvroResult; -use crate::error::Details; -use crate::schema::{Documentation, Name, Names, Parser, Schema, SchemaKind}; -use crate::types; -use crate::util::MapHelper; -use crate::validator::validate_record_field_name; +use std::{ + collections::BTreeMap, + fmt::{Debug, Formatter}, +}; + use log::warn; -use serde::ser::SerializeMap; -use serde::{Serialize, Serializer}; +use serde::{Serialize, Serializer, ser::SerializeMap}; use serde_json::{Map, Value}; -use std::collections::BTreeMap; -use std::fmt::{Debug, Formatter}; + +use crate::{ + AvroResult, + error::Details, + schema::{Documentation, Name, Names, Parser, Schema, SchemaKind}, + types, + util::MapHelper, + validator::validate_record_field_name, +}; /// Represents a `field` in a `record` Avro schema. #[derive(bon::Builder, Clone, PartialEq)] @@ -238,11 +243,12 @@ impl Serialize for RecordField { #[cfg(test)] mod tests { - use super::*; - use crate::schema::{Name, Schema, UnionSchema}; use apache_avro_test_helper::TestResult; use serde_json::json; + use super::*; + use crate::schema::{Name, Schema, UnionSchema}; + #[test] fn test_avro_3621_nullable_record_field() -> TestResult { let nullable_record_field = RecordField::builder() diff --git a/avro/src/schema/union.rs b/avro/src/schema/union.rs index 36b7a93..eae02dc 100644 --- a/avro/src/schema/union.rs +++ b/avro/src/schema/union.rs @@ -15,18 +15,24 @@ // specific language governing permissions and limitations // under the License. -use crate::error::Details; -use crate::schema::{ - DecimalSchema, FixedSchema, InnerDecimalSchema, Name, NamespaceRef, RecordSchema, Schema, - SchemaKind, UuidSchema, +use std::{ + borrow::Borrow, + collections::{BTreeMap, HashMap}, + fmt::{Debug, Formatter}, }; -use crate::types; -use crate::{AvroResult, Error}; -use std::borrow::Borrow; -use std::collections::{BTreeMap, HashMap}; -use std::fmt::{Debug, Formatter}; + use strum::IntoDiscriminant; +use crate::{ + AvroResult, Error, + error::Details, + schema::{ + DecimalSchema, FixedSchema, InnerDecimalSchema, Name, NamespaceRef, RecordSchema, Schema, + SchemaKind, UuidSchema, + }, + types, +}; + /// A description of a Union schema #[derive(Clone)] pub struct UnionSchema { @@ -498,12 +504,15 @@ fn schema_to_base_schemakind(schema: &Schema) -> SchemaKind { #[cfg(test)] mod tests { - use super::*; - use crate::error::{Details, Error}; - use crate::schema::RecordSchema; - use crate::types::Value; use apache_avro_test_helper::TestResult; + use super::*; + use crate::{ + error::{Details, Error}, + schema::RecordSchema, + types::Value, + }; + #[test] fn avro_rs_402_new_union_schema() -> TestResult { let schema1 = Schema::Int; diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index 255da4b..4ab1f95 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -15,12 +15,15 @@ // specific language governing permissions and limitations // under the License. -use crate::Schema; -use crate::schema::{ - FixedSchema, Name, NamespaceRef, RecordField, RecordSchema, UnionSchema, UuidSchema, +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, +}; + +use crate::{ + Schema, + schema::{FixedSchema, Name, NamespaceRef, RecordField, RecordSchema, UnionSchema, UuidSchema}, }; -use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; /// Trait for types that serve as an Avro data model. /// @@ -960,14 +963,14 @@ tuple_impls! { #[cfg(test)] mod tests { - use crate::reader::datum::GenericDatumReader; - use crate::writer::datum::GenericDatumWriter; + use apache_avro_test_helper::TestResult; + use crate::{ AvroSchema, Schema, + reader::datum::GenericDatumReader, schema::{FixedSchema, Name}, + writer::datum::GenericDatumWriter, }; - use apache_avro_test_helper::TestResult; - use std::io::Cursor; #[test] fn avro_rs_401_str() -> TestResult { @@ -1111,9 +1114,9 @@ mod tests { let written_max = writer.write_ser_to_vec(&max)?; let reader = GenericDatumReader::builder(&schema).build()?; - let read_zero = reader.read_deser(&mut Cursor::new(written_zero))?; + let read_zero = reader.read_deser(&mut &written_zero[..])?; assert_eq!(zero, read_zero); - let read_max = reader.read_deser(&mut Cursor::new(written_max))?; + let read_max = reader.read_deser(&mut &written_max[..])?; assert_eq!(max, read_max); Ok(()) } diff --git a/avro/src/serde/deser_schema/array.rs b/avro/src/serde/deser_schema/array.rs index c1da742..64a2885 100644 --- a/avro/src/serde/deser_schema/array.rs +++ b/avro/src/serde/deser_schema/array.rs @@ -15,13 +15,16 @@ // specific language governing permissions and limitations // under the License. -use super::{Config, SchemaAwareDeserializer}; -use crate::schema::ArraySchema; -use crate::util::{zag_i32, zag_i64}; -use crate::{Error, Schema}; +use std::{borrow::Borrow, io::Read}; + use serde::de::{DeserializeSeed, SeqAccess}; -use std::borrow::Borrow; -use std::io::Read; + +use super::{Config, SchemaAwareDeserializer}; +use crate::{ + Error, Schema, + schema::ArraySchema, + util::{zag_i32, zag_i64}, +}; /// Deserialize sequences from an Avro array. pub struct ArrayDeserializer<'s, 'r, R: Read, S: Borrow<Schema>> { diff --git a/avro/src/serde/deser_schema/enums.rs b/avro/src/serde/deser_schema/enums.rs index bb2e7c5..bba48fd 100644 --- a/avro/src/serde/deser_schema/enums.rs +++ b/avro/src/serde/deser_schema/enums.rs @@ -15,15 +15,20 @@ // specific language governing permissions and limitations // under the License. +use std::{borrow::Borrow, io::Read}; + +use serde::{ + Deserializer, + de::{DeserializeSeed, EnumAccess, Unexpected, VariantAccess, Visitor}, +}; + use super::{Config, DESERIALIZE_ANY, SchemaAwareDeserializer, identifier::IdentifierDeserializer}; -use crate::error::Details; -use crate::schema::{EnumSchema, UnionSchema}; -use crate::util::zag_i32; -use crate::{Error, Schema}; -use serde::Deserializer; -use serde::de::{DeserializeSeed, EnumAccess, Unexpected, VariantAccess, Visitor}; -use std::borrow::Borrow; -use std::io::Read; +use crate::{ + Error, Schema, + error::Details, + schema::{EnumSchema, UnionSchema}, + util::zag_i32, +}; /// Deserializer for plain enums. pub struct PlainEnumDeserializer<'s, 'r, R: Read> { diff --git a/avro/src/serde/deser_schema/identifier.rs b/avro/src/serde/deser_schema/identifier.rs index de51f1a..8c1b468 100644 --- a/avro/src/serde/deser_schema/identifier.rs +++ b/avro/src/serde/deser_schema/identifier.rs @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::Error; -use crate::error::Details; -use serde::Deserializer; -use serde::de::Visitor; +use serde::{Deserializer, de::Visitor}; + +use crate::{Error, error::Details}; /// Deserializer that only accepts `deserialize_identifier` calls. pub enum IdentifierDeserializer<'s> { diff --git a/avro/src/serde/deser_schema/map.rs b/avro/src/serde/deser_schema/map.rs index 90508da..35dc37f 100644 --- a/avro/src/serde/deser_schema/map.rs +++ b/avro/src/serde/deser_schema/map.rs @@ -15,13 +15,16 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::MapSchema; -use crate::serde::deser_schema::{Config, SchemaAwareDeserializer}; -use crate::util::{zag_i32, zag_i64}; -use crate::{Error, Schema}; +use std::{borrow::Borrow, io::Read}; + use serde::de::{DeserializeSeed, MapAccess}; -use std::borrow::Borrow; -use std::io::Read; + +use crate::{ + Error, Schema, + schema::MapSchema, + serde::deser_schema::{Config, SchemaAwareDeserializer}, + util::{zag_i32, zag_i64}, +}; pub struct MapDeserializer<'s, 'r, R: Read, S: Borrow<Schema>> { reader: &'r mut R, diff --git a/avro/src/serde/deser_schema/mod.rs b/avro/src/serde/deser_schema/mod.rs index f7c8dc6..0b64de3 100644 --- a/avro/src/serde/deser_schema/mod.rs +++ b/avro/src/serde/deser_schema/mod.rs @@ -15,15 +15,17 @@ // specific language governing permissions and limitations // under the License. -use crate::decode::decode_len; -use crate::error::Details; -use crate::schema::{DecimalSchema, InnerDecimalSchema, Name, UnionSchema, UuidSchema}; -use crate::util::{zag_i32, zag_i64}; -use crate::{Error, Schema}; +use std::{borrow::Borrow, collections::HashMap, io::Read}; + use serde::de::{Deserializer, Visitor}; -use std::borrow::Borrow; -use std::collections::HashMap; -use std::io::Read; + +use crate::{ + Error, Schema, + decode::decode_len, + error::Details, + schema::{DecimalSchema, InnerDecimalSchema, Name, UnionSchema, UuidSchema}, + util::{zag_i32, zag_i64}, +}; mod array; mod enums; @@ -32,13 +34,14 @@ mod map; mod record; mod tuple; -use crate::serde::deser_schema::enums::UnionEnumDeserializer; use array::ArrayDeserializer; use enums::PlainEnumDeserializer; use map::MapDeserializer; use record::RecordDeserializer; use tuple::{ManyTupleDeserializer, OneTupleDeserializer}; +use crate::serde::deser_schema::enums::UnionEnumDeserializer; + /// Configure the deserializer. #[derive(Debug)] pub struct Config<'s, S: Borrow<Schema>> { @@ -694,26 +697,21 @@ impl<'de, 's, 'r, R: Read, S: Borrow<Schema>> Deserializer<'de> fn deserialize_enum<V>( self, _name: &'static str, - variants: &'static [&'static str], + // This also includes aliases, so can't be used to check the amount of symbols + _variants: &'static [&'static str], visitor: V, ) -> Result<V::Value, Self::Error> where V: Visitor<'de>, { match self.schema { - Schema::Enum(schema) if schema.symbols.len() == variants.len() => { + Schema::Enum(schema) => { visitor.visit_enum(PlainEnumDeserializer::new(self.reader, schema)) } - Schema::Union(union) if union.variants().len() == variants.len() => { + Schema::Union(union) => { visitor.visit_enum(UnionEnumDeserializer::new(self.reader, union, self.config)) } - _ => Err(self.error( - "enum", - format!( - "Expected Schema::Enum(symbols.len() == {}) | Schema::Union(variants.len() == {}", - variants.len(), variants.len() - ), - )), + _ => Err(self.error("enum", "Expected Schema::Enum | Schema::Union")), } } @@ -743,20 +741,20 @@ impl<'de, 's, 'r, R: Read, S: Borrow<Schema>> Deserializer<'de> #[cfg(test)] mod tests { use std::fmt::Debug; - use std::io::Cursor; + use apache_avro_test_helper::TestResult; use num_bigint::BigInt; use pretty_assertions::assert_eq; - use serde::de::{DeserializeOwned, Visitor}; - use serde::{Deserialize, Serialize}; + use serde::{ + Deserialize, Serialize, + de::{DeserializeOwned, Visitor}, + }; use uuid::Uuid; - use apache_avro_test_helper::TestResult; - use super::*; - use crate::reader::datum::GenericDatumReader; - use crate::writer::datum::GenericDatumWriter; - use crate::{AvroResult, Decimal}; + use crate::{ + AvroResult, Decimal, reader::datum::GenericDatumReader, writer::datum::GenericDatumWriter, + }; #[track_caller] fn assert_roundtrip<T>(value: T, schema: &Schema, schemata: Vec<&Schema>) -> AvroResult<()> @@ -771,7 +769,7 @@ mod tests { let decoded_value: T = GenericDatumReader::builder(schema) .writer_schemata(schemata)? .build()? - .read_deser(&mut Cursor::new(buf))?; + .read_deser(&mut &buf[..])?; assert_eq!(decoded_value, value); @@ -850,11 +848,11 @@ mod tests { )?; // Contains index 3 (4th symbol) - let data_with_unknown_index = vec![6u8]; + let data_with_unknown_index = &[6u8]; let error = GenericDatumReader::builder(&schema) .build()? - .read_deser::<AccessLog>(&mut Cursor::new(data_with_unknown_index)) + .read_deser::<AccessLog>(&mut &data_with_unknown_index[..]) .unwrap_err(); assert_eq!(error.to_string(), "Enum symbol index out of bounds: 3"); @@ -1134,7 +1132,7 @@ mod tests { let decoded_value: TestUnitUntaggedEnum = GenericDatumReader::builder(&schema) .build()? - .read_deser(&mut Cursor::new(buf))?; + .read_deser(&mut &buf[..])?; // Val2 cannot troundtrip. All unit variants are serialized to the same null. // This also doesn't roundtrip in serde_json. @@ -1570,14 +1568,14 @@ mod tests { assert_roundtrip(uuid, &schema, Vec::new())?; let buf = GenericDatumWriter::builder(&alt_schema) - // .human_readable(true) + .human_readable(true) .build()? .write_ser_to_vec(&uuid)?; let decoded_value: Uuid = GenericDatumReader::builder(&alt_schema) .human_readable(true) .build()? - .read_deser(&mut Cursor::new(buf))?; + .read_deser(&mut &buf[..])?; assert_eq!(decoded_value, uuid); @@ -1641,7 +1639,7 @@ mod tests { let decoded_value: Bytes = GenericDatumReader::builder(&schema) .build()? - .read_deser(&mut Cursor::new(buf))?; + .read_deser(&mut &buf[..])?; assert_eq!(decoded_value.0, expected_bytes); @@ -1651,7 +1649,7 @@ mod tests { let decoded_value: Option<Bytes> = GenericDatumReader::builder(&schema_union) .build()? - .read_deser(&mut Cursor::new(buf))?; + .read_deser(&mut &buf[..])?; assert_eq!(decoded_value.unwrap().0, expected_bytes); @@ -1677,7 +1675,7 @@ mod tests { let error = GenericDatumReader::builder(&schema) .build()? - .read_deser::<char>(&mut Cursor::new(buf)) + .read_deser::<char>(&mut &buf[..]) .unwrap_err(); assert_eq!( diff --git a/avro/src/serde/deser_schema/record.rs b/avro/src/serde/deser_schema/record.rs index 2414037..f5de602 100644 --- a/avro/src/serde/deser_schema/record.rs +++ b/avro/src/serde/deser_schema/record.rs @@ -15,13 +15,15 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::RecordSchema; -use crate::serde::deser_schema::identifier::IdentifierDeserializer; -use crate::serde::deser_schema::{Config, SchemaAwareDeserializer}; -use crate::{Error, Schema}; +use std::{borrow::Borrow, io::Read}; + use serde::de::{DeserializeSeed, MapAccess}; -use std::borrow::Borrow; -use std::io::Read; + +use crate::{ + Error, Schema, + schema::RecordSchema, + serde::deser_schema::{Config, SchemaAwareDeserializer, identifier::IdentifierDeserializer}, +}; pub struct RecordDeserializer<'s, 'r, R: Read, S: Borrow<Schema>> { reader: &'r mut R, diff --git a/avro/src/serde/deser_schema/tuple.rs b/avro/src/serde/deser_schema/tuple.rs index 5b68746..cd11cde 100644 --- a/avro/src/serde/deser_schema/tuple.rs +++ b/avro/src/serde/deser_schema/tuple.rs @@ -15,12 +15,15 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::RecordSchema; -use crate::serde::deser_schema::{Config, SchemaAwareDeserializer}; -use crate::{Error, Schema}; +use std::{borrow::Borrow, io::Read}; + use serde::de::{DeserializeSeed, SeqAccess}; -use std::borrow::Borrow; -use std::io::Read; + +use crate::{ + Error, Schema, + schema::RecordSchema, + serde::deser_schema::{Config, SchemaAwareDeserializer}, +}; pub struct OneTupleDeserializer<'s, 'r, R: Read, S: Borrow<Schema>> { reader: &'r mut R, diff --git a/avro/src/serde/ser_schema/block.rs b/avro/src/serde/ser_schema/block.rs index e15b486..11dcffb 100644 --- a/avro/src/serde/ser_schema/block.rs +++ b/avro/src/serde/ser_schema/block.rs @@ -15,16 +15,21 @@ // specific language governing permissions and limitations // under the License. +use std::{borrow::Borrow, io::Write}; + +use serde::{ + Serialize, + ser::{SerializeMap, SerializeSeq}, +}; + use super::{Config, SchemaAwareSerializer}; -use crate::encode::encode_int; -use crate::error::Details; -use crate::schema::{ArraySchema, MapSchema}; -use crate::util::zig_i32; -use crate::{Error, Schema}; -use serde::Serialize; -use serde::ser::{SerializeMap, SerializeSeq}; -use std::borrow::Borrow; -use std::io::Write; +use crate::{ + Error, Schema, + encode::encode_int, + error::Details, + schema::{ArraySchema, MapSchema}, + util::zig_i32, +}; #[expect( private_interfaces, diff --git a/avro/src/serde/ser_schema/mod.rs b/avro/src/serde/ser_schema/mod.rs index c971bff..d6975e2 100644 --- a/avro/src/serde/ser_schema/mod.rs +++ b/avro/src/serde/ser_schema/mod.rs @@ -22,25 +22,25 @@ mod record; mod tuple; mod union; -use crate::error::Details; -use crate::schema::{ - DecimalSchema, InnerDecimalSchema, MapSchema, Name, RecordSchema, SchemaKind, UnionSchema, - UuidSchema, -}; -use crate::util::{zig_i32, zig_i64}; -use crate::{Error, Schema}; -use serde::ser::SerializeMap; -use serde::{Serialize, Serializer}; -use serde_json::Value::Bool; -use std::borrow::Borrow; -use std::collections::HashMap; -use std::io::Write; +use std::{borrow::Borrow, collections::HashMap, io::Write}; use block::BlockSerializer; use record::RecordSerializer; +use serde::{Serialize, Serializer, ser::SerializeMap}; +use serde_json::Value::Bool; use tuple::{ManyTupleSerializer, TupleSerializer}; use union::UnionSerializer; +use crate::{ + Error, Schema, + error::Details, + schema::{ + DecimalSchema, InnerDecimalSchema, MapSchema, Name, RecordSchema, SchemaKind, UnionSchema, + UuidSchema, + }, + util::{zig_i32, zig_i64}, +}; + pub struct Config<'s, S: Borrow<Schema>> { /// Any references in the schema will be resolved using this map. /// @@ -726,21 +726,26 @@ impl<'s, 'w, W: Write, S: Borrow<Schema>> SerializeMap for MapOrRecordSerializer #[cfg(test)] mod tests { - use super::*; - use crate::schema::FixedSchema; - use crate::{Days, Duration, Millis, Months, decimal::Decimal, schema::ResolvedSchema}; + use std::{ + collections::{BTreeMap, HashMap}, + marker::PhantomData, + }; + use apache_avro_test_helper::TestResult; use bigdecimal::BigDecimal; use num_bigint::{BigInt, Sign}; use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize}; use serde_bytes::Bytes; - use std::{ - collections::{BTreeMap, HashMap}, - marker::PhantomData, - }; use uuid::Uuid; + use super::*; + use crate::{ + Days, Duration, Millis, Months, + decimal::Decimal, + schema::{FixedSchema, ResolvedSchema}, + }; + #[track_caller] fn assert_serialize_err<T: Serialize>( t: T, @@ -1928,6 +1933,9 @@ mod tests { struct Foo { a: String, b: String, + c: i64, + d: f64, + e: i64, } let schema = Schema::parse_str( r#" @@ -1942,6 +1950,18 @@ mod tests { { "name":"a", "type":"string" + }, + { + "name":"d", + "type":"double" + }, + { + "name":"e", + "type":"long" + }, + { + "name":"c", + "type":"long" } ] } @@ -1952,13 +1972,19 @@ mod tests { let foo = Foo { a: "Hello".into(), b: "World".into(), + c: 42, + d: std::f64::consts::PI, + e: 5, }; - // Serializing fields out of order is NOT supported - assert_serialize_err( + + assert_serialize( foo, &schema, &names, - r#"Missing default for skipped field 'b' of schema RecordSchema { name: Name { name: "Foo", .. }, fields: [RecordField { name: "b", schema: String, .. }, RecordField { name: "a", schema: String, .. }], .. }"#, + &[ + 10, b'W', b'o', b'r', b'l', b'd', 10, b'H', b'e', b'l', b'l', b'o', 24, 45, 68, 84, + 251, 33, 9, 64, 10, 84, + ], ); Ok(()) diff --git a/avro/src/serde/ser_schema/record/field_default.rs b/avro/src/serde/ser_schema/record/field_default.rs index 14716e8..55dc459 100644 --- a/avro/src/serde/ser_schema/record/field_default.rs +++ b/avro/src/serde/ser_schema/record/field_default.rs @@ -1,9 +1,12 @@ -use crate::Schema; -use crate::schema::{SchemaKind, UnionSchema, UuidSchema}; -use crate::serde::ser_schema::SERIALIZING_SCHEMA_DEFAULT; use serde::{Serialize, Serializer, ser::Error}; use serde_json::Value; +use crate::{ + Schema, + schema::{SchemaKind, UnionSchema, UuidSchema}, + serde::ser_schema::SERIALIZING_SCHEMA_DEFAULT, +}; + pub struct SchemaAwareRecordFieldDefault<'v, 's> { value: &'v Value, schema: &'s Schema, diff --git a/avro/src/serde/ser_schema/record/mod.rs b/avro/src/serde/ser_schema/record/mod.rs index 0d44828..ee20d05 100644 --- a/avro/src/serde/ser_schema/record/mod.rs +++ b/avro/src/serde/ser_schema/record/mod.rs @@ -17,21 +17,31 @@ mod field_default; +use std::{borrow::Borrow, cmp::Ordering, collections::HashMap, io::Write}; + +use serde::{ + Serialize, + ser::{SerializeMap, SerializeStruct, SerializeStructVariant}, +}; + use super::{Config, SchemaAwareSerializer}; -use crate::error::Details; -use crate::schema::RecordSchema; -use crate::serde::ser_schema::record::field_default::SchemaAwareRecordFieldDefault; -use crate::serde::util::StringSerializer; -use crate::{Error, Schema}; -use serde::Serialize; -use serde::ser::{SerializeMap, SerializeStruct, SerializeStructVariant}; -use std::borrow::Borrow; -use std::io::Write; +use crate::{ + Error, Schema, + error::Details, + schema::RecordSchema, + serde::{ + ser_schema::record::field_default::SchemaAwareRecordFieldDefault, util::StringSerializer, + }, +}; pub struct RecordSerializer<'s, 'w, W: Write, S: Borrow<Schema>> { writer: &'w mut W, record: &'s RecordSchema, config: Config<'s, S>, + /// Cache fields received out-of-order + cache: HashMap<usize, Vec<u8>>, + /// The position of the current map entry being written + map_position: Option<usize>, /// The field that should be written now. field_position: usize, bytes_written: usize, @@ -48,6 +58,8 @@ impl<'s, 'w, W: Write, S: Borrow<Schema>> RecordSerializer<'s, 'w, W, S> { writer, record, config, + cache: HashMap::new(), + map_position: None, field_position: 0, bytes_written: bytes_written.unwrap_or(0), } @@ -89,21 +101,49 @@ impl<'s, 'w, W: Write, S: Borrow<Schema>> RecordSerializer<'s, 'w, W, S> { position: usize, value: &T, ) -> Result<(), Error> { - // Serialize any skipped fields using their default value - while self.field_position < position { - self.serialize_default(self.field_position)?; - } let field = &self.record.fields[position]; - self.bytes_written += value - .serialize(SchemaAwareSerializer::new( - self.writer, - &field.schema, - self.config, - )?) - .map_err(|e| self.field_error(self.field_position, e))?; - self.field_position += 1; - - Ok(()) + match self.field_position.cmp(&position) { + Ordering::Equal => { + // Field received in the right order + self.bytes_written += value + .serialize(SchemaAwareSerializer::new( + self.writer, + &field.schema, + self.config, + )?) + .map_err(|e| self.field_error(self.field_position, e))?; + self.field_position += 1; + + // Write any fields that were already received and can now be written + while let Some(bytes) = self.cache.remove(&self.field_position) { + self.writer.write_all(&bytes).map_err(Details::WriteBytes)?; + self.bytes_written += bytes.len(); + self.field_position += 1; + } + + Ok(()) + } + Ordering::Less => { + // Another field needs to be written first, so cache this field + let mut bytes = Vec::new(); + value + .serialize(SchemaAwareSerializer::new( + &mut bytes, + &field.schema, + self.config, + )?) + .map_err(|e| self.field_error(self.field_position, e))?; + if self.cache.insert(position, bytes).is_some() { + Err(Details::FieldNameDuplicate(field.name.clone()).into()) + } else { + Ok(()) + } + } + Ordering::Greater => { + // This field is already written to the writer so we got a duplicate + Err(Details::FieldNameDuplicate(field.name.clone()).into()) + } + } } fn serialize_default(&mut self, position: usize) -> Result<(), Error> { @@ -171,10 +211,7 @@ impl<'s, 'w, W: Write, S: Borrow<Schema>> SerializeMap for RecordSerializer<'s, { let name = key.serialize(StringSerializer)?; if let Some(position) = self.record.lookup.get(&name).copied() { - // Write any skipped fields now, so that `serialize_value` can just call `serialize_next_field` - while self.field_position < position { - self.serialize_default(self.field_position)?; - } + self.map_position = Some(position); Ok(()) } else { Err(Details::FieldName(name.to_string()).into()) @@ -185,13 +222,29 @@ impl<'s, 'w, W: Write, S: Borrow<Schema>> SerializeMap for RecordSerializer<'s, where T: ?Sized + Serialize, { - // `serialize_key` made sure that all skipped fields have been written - self.serialize_next_field(self.field_position, value) + self.serialize_next_field( + self.map_position + .expect("serialze_value called without calling serialize_key"), + value, + ) } fn end(self) -> Result<Self::Ok, Self::Error> { self.end() } + + fn serialize_entry<K, V>(&mut self, key: &K, value: &V) -> Result<(), Self::Error> + where + K: ?Sized + Serialize, + V: ?Sized + Serialize, + { + let name = key.serialize(StringSerializer)?; + if let Some(position) = self.record.lookup.get(&name).copied() { + self.serialize_next_field(position, value) + } else { + Err(Details::FieldName(name.to_string()).into()) + } + } } impl<'s, 'w, W: Write, S: Borrow<Schema>> SerializeStructVariant diff --git a/avro/src/serde/ser_schema/tuple.rs b/avro/src/serde/ser_schema/tuple.rs index 7aa2a79..dc9dd0d 100644 --- a/avro/src/serde/ser_schema/tuple.rs +++ b/avro/src/serde/ser_schema/tuple.rs @@ -15,14 +15,19 @@ // specific language governing permissions and limitations // under the License. +use std::{borrow::Borrow, io::Write}; + +use serde::{ + Serialize, + ser::{SerializeTuple, SerializeTupleStruct, SerializeTupleVariant}, +}; + use super::{Config, SchemaAwareSerializer, union::UnionSerializer}; -use crate::error::Details; -use crate::schema::{RecordSchema, UnionSchema}; -use crate::{Error, Schema}; -use serde::Serialize; -use serde::ser::{SerializeTuple, SerializeTupleStruct, SerializeTupleVariant}; -use std::borrow::Borrow; -use std::io::Write; +use crate::{ + Error, Schema, + error::Details, + schema::{RecordSchema, UnionSchema}, +}; #[expect( private_interfaces, diff --git a/avro/src/serde/ser_schema/union.rs b/avro/src/serde/ser_schema/union.rs index a893fb5..c6dd0ea 100644 --- a/avro/src/serde/ser_schema/union.rs +++ b/avro/src/serde/ser_schema/union.rs @@ -15,18 +15,25 @@ // specific language governing permissions and limitations // under the License. -use super::{Config, MapOrRecordSerializer, SchemaAwareSerializer}; -use crate::error::Details; -use crate::schema::{FixedSchema, SchemaKind, UnionSchema}; -use crate::serde::ser_schema::block::BlockSerializer; -use crate::serde::ser_schema::record::RecordSerializer; -use crate::serde::ser_schema::tuple::{ManyTupleSerializer, TupleSerializer}; -use crate::serde::with::{BytesType, SER_BYTES_TYPE}; -use crate::util::{zig_i32, zig_i64}; -use crate::{Error, Schema}; +use std::{borrow::Borrow, io::Write}; + use serde::{Serialize, Serializer}; -use std::borrow::Borrow; -use std::io::Write; + +use super::{Config, MapOrRecordSerializer, SchemaAwareSerializer}; +use crate::{ + Error, Schema, + error::Details, + schema::{FixedSchema, SchemaKind, UnionSchema}, + serde::{ + ser_schema::{ + block::BlockSerializer, + record::RecordSerializer, + tuple::{ManyTupleSerializer, TupleSerializer}, + }, + with::{BytesType, SER_BYTES_TYPE}, + }, + util::{zig_i32, zig_i64}, +}; pub struct UnionSerializer<'s, 'w, W: Write, S: Borrow<Schema>> { writer: &'w mut W, diff --git a/avro/src/serde/with.rs b/avro/src/serde/with.rs index 74c0a3a..893940e 100644 --- a/avro/src/serde/with.rs +++ b/avro/src/serde/with.rs @@ -93,15 +93,14 @@ impl Drop for BorrowedGuard { /// /// [`apache_avro::serde::bytes_opt`]: bytes_opt pub mod bytes { - use super::BytesType; use std::collections::HashSet; use serde::{Deserializer, Serializer}; - use crate::schema::NamespaceRef; + use super::BytesType; use crate::{ Schema, - schema::{Name, RecordField}, + schema::{Name, NamespaceRef, RecordField}, }; /// Returns [`Schema::Bytes`] @@ -160,14 +159,14 @@ pub mod bytes { /// /// [`apache_avro::serde::bytes`]: bytes pub mod bytes_opt { - use super::BytesType; - use serde::{Deserializer, Serializer}; use std::{borrow::Borrow, collections::HashSet}; - use crate::schema::NamespaceRef; + use serde::{Deserializer, Serializer}; + + use super::BytesType; use crate::{ Schema, - schema::{Name, RecordField, UnionSchema}, + schema::{Name, NamespaceRef, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Bytes)` @@ -231,13 +230,12 @@ pub mod bytes_opt { pub mod fixed { use std::collections::HashSet; - use super::BytesType; use serde::{Deserializer, Serializer}; - use crate::schema::NamespaceRef; + use super::BytesType; use crate::{ Schema, - schema::{FixedSchema, Name, RecordField}, + schema::{FixedSchema, Name, NamespaceRef, RecordField}, }; /// Returns `Schema::Fixed(N)` named `serde_avro_fixed_{N}` @@ -310,14 +308,14 @@ pub mod fixed { /// /// [`apache_avro::serde::fixed`]: fixed pub mod fixed_opt { - use super::BytesType; - use serde::{Deserializer, Serializer}; use std::{borrow::Borrow, collections::HashSet}; - use crate::schema::NamespaceRef; + use serde::{Deserializer, Serializer}; + + use super::BytesType; use crate::{ Schema, - schema::{Name, RecordField, UnionSchema}, + schema::{Name, NamespaceRef, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Fixed(N))` where the fixed schema is named `serde_avro_fixed_{N}` @@ -388,15 +386,14 @@ pub mod fixed_opt { /// [`Value::Fixed`]: crate::types::Value::Fixed /// [`apache_avro::serde::slice_opt`]: slice_opt pub mod slice { - use super::BytesType; use std::collections::HashSet; use serde::{Deserializer, Serializer}; - use crate::schema::NamespaceRef; + use super::BytesType; use crate::{ Schema, - schema::{Name, RecordField}, + schema::{Name, NamespaceRef, RecordField}, }; /// Returns [`Schema::Bytes`] @@ -458,14 +455,14 @@ pub mod slice { /// [`Value::Fixed`]: crate::types::Value::Fixed /// [`apache_avro::serde::slice`]: mod@slice pub mod slice_opt { - use super::BytesType; - use serde::{Deserializer, Serializer}; use std::{borrow::Borrow, collections::HashSet}; - use crate::schema::NamespaceRef; + use serde::{Deserializer, Serializer}; + + use super::BytesType; use crate::{ Schema, - schema::{Name, RecordField, UnionSchema}, + schema::{Name, NamespaceRef, RecordField, UnionSchema}, }; /// Returns `Schema::Union(Schema::Null, Schema::Bytes)` @@ -677,13 +674,17 @@ pub mod bigdecimal_opt { /// [`apache_avro::serde::array_opt`]: array_opt /// [`Schema::Array`]: crate::schema::Schema::Array pub mod array { + use std::collections::HashSet; + + use serde::{ + Deserialize, Deserializer, Serialize, Serializer, + de::{DeserializeOwned, Error as _}, + }; + use crate::{ AvroSchemaComponent, Schema, schema::{Name, NamespaceRef, RecordField}, }; - use serde::de::DeserializeOwned; - use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error as _}; - use std::collections::HashSet; /// Returns `Schema::Array(T::get_schema_in_ctxt())` pub fn get_schema_in_ctxt<T: AvroSchemaComponent>( @@ -746,9 +747,10 @@ pub mod array { /// /// [`apache_avro::serde::array`]: mod@array pub mod array_opt { - use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error as _}; use std::collections::HashSet; + use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error as _}; + use crate::{ AvroSchemaComponent, Schema, schema::{Name, NamespaceRef, RecordField, UnionSchema}, @@ -814,9 +816,10 @@ pub mod array_opt { #[cfg(test)] mod tests { - use crate::{Schema, from_value, to_value, types::Value}; use serde::{Deserialize, Serialize}; + use crate::{Schema, from_value, to_value, types::Value}; + #[test] fn avro_3631_validate_schema_for_struct_with_byte_types() { #[derive(Debug, Serialize)] diff --git a/avro/src/writer/datum.rs b/avro/src/writer/datum.rs index 3f79502..2fa91d9 100644 --- a/avro/src/writer/datum.rs +++ b/avro/src/writer/datum.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. +use std::io::Write; + use bon::bon; use serde::Serialize; -use std::io::Write; use crate::{ AvroResult, Schema, @@ -241,9 +242,9 @@ mod tests { use apache_avro_test_helper::TestResult; use super::*; - use crate::reader::datum::GenericDatumReader; use crate::{ Days, Decimal, Duration, Millis, Months, + reader::datum::GenericDatumReader, schema::{DecimalSchema, FixedSchema, InnerDecimalSchema, Name}, types::Record, util::zig_i64, diff --git a/avro/src/writer/mod.rs b/avro/src/writer/mod.rs index b11bbde..2077fd2 100644 --- a/avro/src/writer/mod.rs +++ b/avro/src/writer/mod.rs @@ -16,6 +16,10 @@ // under the License. //! Logic handling writing in Avro format at user level. +use std::{collections::HashMap, io::Write, mem::ManuallyDrop}; + +use serde::Serialize; + use crate::{ AvroResult, Codec, Error, encode::{encode, encode_internal, encode_to_vec}, @@ -25,8 +29,6 @@ use crate::{ types::Value, util::is_human_readable, }; -use serde::Serialize; -use std::{collections::HashMap, io::Write, mem::ManuallyDrop}; pub mod datum; pub mod single_object; @@ -656,13 +658,12 @@ fn generate_sync_marker() -> [u8; 16] { mod tests { use std::{cell::RefCell, rc::Rc}; - use super::*; - use crate::{Reader, types::Record, util::zig_i64}; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize}; - use crate::{codec::DeflateSettings, error::Details}; - use apache_avro_test_helper::TestResult; + use super::*; + use crate::{Reader, codec::DeflateSettings, error::Details, types::Record, util::zig_i64}; const AVRO_OBJECT_HEADER_LEN: usize = AVRO_OBJECT_HEADER.len(); diff --git a/avro/src/writer/single_object.rs b/avro/src/writer/single_object.rs index d34340c..528ceaf 100644 --- a/avro/src/writer/single_object.rs +++ b/avro/src/writer/single_object.rs @@ -20,16 +20,15 @@ use std::{io::Write, marker::PhantomData, ops::RangeInclusive}; use bon::Builder; use serde::Serialize; -use crate::Error; -use crate::encode::encode_internal; -use crate::serde::ser_schema::{Config, SchemaAwareSerializer}; -use crate::util::is_human_readable; use crate::{ - AvroResult, AvroSchema, Schema, + AvroResult, AvroSchema, Error, Schema, + encode::encode_internal, error::Details, headers::{HeaderBuilder, RabinFingerprintHeader}, schema::ResolvedOwnedSchema, + serde::ser_schema::{Config, SchemaAwareSerializer}, types::Value, + util::is_human_readable, }; /// Writer that encodes messages according to the single object encoding v1 spec @@ -251,9 +250,8 @@ mod tests { use apache_avro_test_helper::TestResult; use uuid::Uuid; - use crate::{encode::encode, headers::GlueSchemaUuidHeader, rabin::Rabin}; - use super::*; + use crate::{encode::encode, headers::GlueSchemaUuidHeader, rabin::Rabin}; #[derive(Serialize, Clone)] struct TestSingleObjectWriter { diff --git a/avro/tests/avro-rs-226.rs b/avro/tests/avro-rs-226.rs index fd1b6d2..1e49cd4 100644 --- a/avro/tests/avro-rs-226.rs +++ b/avro/tests/avro-rs-226.rs @@ -15,26 +15,25 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{AvroSchema, Schema, Writer, from_value}; +use std::fmt::Debug; + +use apache_avro::{ + AvroSchema, Schema, reader::datum::GenericDatumReader, writer::datum::GenericDatumWriter, +}; use apache_avro_test_helper::TestResult; use serde::{Deserialize, Serialize, de::DeserializeOwned}; -use std::fmt::Debug; fn ser_deser<T>(schema: &Schema, record: T) -> TestResult where T: Serialize + DeserializeOwned + Debug + PartialEq + Clone, { - let record2 = record.clone(); - let mut writer = Writer::new(schema, vec![])?; - writer.append_ser(record)?; - let bytes_written = writer.into_inner()?; - - let reader = apache_avro::Reader::new(&bytes_written[..])?; - for value in reader { - let value = value?; - let deserialized = from_value::<T>(&value)?; - assert_eq!(deserialized, record2); - } + let serialized = GenericDatumWriter::builder(schema) + .build()? + .write_ser_to_vec(&record)?; + let deserialized: T = GenericDatumReader::builder(schema) + .build()? + .read_deser(&mut &serialized[..])?; + assert_eq!(deserialized, record); Ok(()) } diff --git a/avro/tests/avro-rs-285-bytes_deserialization.rs b/avro/tests/avro-rs-285-bytes_deserialization.rs index b9ebe80..5dd5e4e 100644 --- a/avro/tests/avro-rs-285-bytes_deserialization.rs +++ b/avro/tests/avro-rs-285-bytes_deserialization.rs @@ -15,10 +15,12 @@ // specific language governing permissions and limitations // under the License. +use apache_avro::Reader; use apache_avro_test_helper::TestResult; use serde::{Deserialize, Serialize}; #[derive(Debug, Deserialize, PartialEq, Serialize)] +#[serde(rename = "SimpleRecord")] struct ExampleByteArray { #[serde(with = "apache_avro::serde::bytes_opt")] data_bytes: Option<Vec<u8>>, @@ -26,6 +28,7 @@ struct ExampleByteArray { } #[derive(Deserialize, Serialize)] +#[serde(rename = "SimpleRecord")] struct ExampleByteArrayFiltered { description: Option<String>, } @@ -70,10 +73,9 @@ fn avro_rs_285_bytes_deserialization_round_trip() -> TestResult { let avro_data = writer.into_inner()?; // deserialize Avro binary data back into ExampleByteArray structs - let reader = apache_avro::Reader::new(&avro_data[..])?; - let deserialized_records: Vec<ExampleByteArray> = reader - .map(|value| apache_avro::from_value::<ExampleByteArray>(&value.unwrap()).unwrap()) - .collect(); + let deserialized_records = Reader::new(&avro_data[..])? + .into_deser_iter() + .collect::<Result<Vec<ExampleByteArray>, _>>()?; assert_eq!(records, deserialized_records); Ok(()) @@ -118,10 +120,9 @@ fn avro_rs_285_bytes_deserialization_filtered_round_trip() -> TestResult { let avro_data = writer.into_inner()?; // deserialize Avro binary data back into ExampleByteArrayFiltered structs - let reader = apache_avro::Reader::new(&avro_data[..])?; - let deserialized_records: Vec<ExampleByteArrayFiltered> = reader - .map(|value| apache_avro::from_value::<ExampleByteArrayFiltered>(&value.unwrap()).unwrap()) - .collect(); + let deserialized_records = Reader::new(&avro_data[..])? + .into_deser_iter() + .collect::<Result<Vec<ExampleByteArrayFiltered>, _>>()?; assert_eq!(records.len(), deserialized_records.len()); diff --git a/avro/tests/schema.rs b/avro/tests/schema.rs index 89e328a..9f3294b 100644 --- a/avro/tests/schema.rs +++ b/avro/tests/schema.rs @@ -15,15 +15,19 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::writer::datum::GenericDatumWriter; +use std::{ + collections::HashMap, + io::{Cursor, Read}, +}; + use apache_avro::{ Codec, Error, Reader, Schema, Writer, error::Details, - from_value, reader::datum::GenericDatumReader, schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema}, to_value, types::{Record, Value}, + writer::datum::GenericDatumWriter, }; use apache_avro_test_helper::{ TestResult, @@ -31,10 +35,6 @@ use apache_avro_test_helper::{ init, }; use serde::{Deserialize, Serialize}; -use std::{ - collections::HashMap, - io::{Cursor, Read}, -}; #[test] fn test_correct_recursive_extraction() -> TestResult { @@ -837,7 +837,7 @@ fn avro_old_issue_47() -> TestResult { let schema_str = r#" { "type": "record", - "name": "my_record", + "name": "MyRecord", "fields": [ {"name": "a", "type": "long"}, {"name": "b", "type": "string"} @@ -858,15 +858,13 @@ fn avro_old_issue_47() -> TestResult { a: 1, }; - let ser_value = to_value(record.clone())?; let serialized_bytes = GenericDatumWriter::builder(&schema) .build()? - .write_value_to_vec(ser_value)?; + .write_ser_to_vec(&record)?; - let de_value = GenericDatumReader::builder(&schema) + let deserialized_record: MyRecord = GenericDatumReader::builder(&schema) .build()? - .read_value(&mut &*serialized_bytes)?; - let deserialized_record = from_value::<MyRecord>(&de_value)?; + .read_deser(&mut &serialized_bytes[..])?; assert_eq!(record, deserialized_record); Ok(()) diff --git a/avro/tests/union_schema.rs b/avro/tests/union_schema.rs index 892b69b..e1b7fa0 100644 --- a/avro/tests/union_schema.rs +++ b/avro/tests/union_schema.rs @@ -15,7 +15,11 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{AvroResult, Codec, Reader, Schema, Writer, from_value}; +use std::fmt::Debug; + +use apache_avro::{Schema, reader::datum::GenericDatumReader, writer::datum::GenericDatumWriter}; +use apache_avro_test_helper::TestResult; +use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize, de::DeserializeOwned}; static SCHEMA_A_STR: &str = r#"{ @@ -65,26 +69,25 @@ struct C { field_c: String, } -fn encode_decode<T>(input: &T, schema: &Schema, schemata: &[Schema]) -> AvroResult<T> +#[track_caller] +fn assert_roundtrip<T>(input: &T, schema: &Schema, schemata: &[Schema]) -> TestResult where - T: DeserializeOwned + Serialize, + T: DeserializeOwned + Serialize + PartialEq + Debug, { - let mut encoded: Vec<u8> = Vec::new(); - let mut writer = - Writer::with_schemata(schema, schemata.iter().collect(), &mut encoded, Codec::Null)?; - writer.append_ser(input)?; - writer.flush()?; - drop(writer); //drop the writer so that `encoded` is no more referenced mutably - - let mut reader = Reader::builder(encoded.as_slice()) - .reader_schema(schema) - .schemata(schemata.iter().collect()) - .build()?; - from_value::<T>(&reader.next().expect("")?) + let serialized = GenericDatumWriter::builder(schema) + .schemata(schemata.iter().collect())? + .build()? + .write_ser_to_vec(input)?; + let deserialized: T = GenericDatumReader::builder(schema) + .writer_schemata(schemata.iter().collect())? + .build()? + .read_deser(&mut &serialized[..])?; + assert_eq!(&deserialized, input); + Ok(()) } #[test] -fn test_avro_3901_union_schema_round_trip_no_null() -> AvroResult<()> { +fn test_avro_3901_union_schema_round_trip_no_null() -> TestResult { let schemata: Vec<Schema> = Schema::parse_list([SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_C_STR]).expect("parsing schemata"); @@ -92,15 +95,13 @@ fn test_avro_3901_union_schema_round_trip_no_null() -> AvroResult<()> { field_union: (UnionAB::A(A { field_a: 45.5 })), field_c: "foo".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; let input = C { field_union: (UnionAB::B(B { field_b: 73 })), field_c: "bar".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; Ok(()) } @@ -128,7 +129,7 @@ struct D { } #[test] -fn test_avro_3901_union_schema_round_trip_null_at_start() -> AvroResult<()> { +fn test_avro_3901_union_schema_round_trip_null_at_start() -> TestResult { let schemata: Vec<Schema> = Schema::parse_list([SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_D_STR]).expect("parsing schemata"); @@ -136,22 +137,19 @@ fn test_avro_3901_union_schema_round_trip_null_at_start() -> AvroResult<()> { field_union: UnionNoneAB::A(A { field_a: 54.25 }), field_d: "fooy".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; let input = D { field_union: UnionNoneAB::None, field_d: "fooyy".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; let input = D { field_union: UnionNoneAB::B(B { field_b: 103 }), field_d: "foov".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; Ok(()) } @@ -179,7 +177,7 @@ struct E { } #[test] -fn test_avro_3901_union_schema_round_trip_with_out_of_order_null() -> AvroResult<()> { +fn test_avro_3901_union_schema_round_trip_with_out_of_order_null() -> TestResult { let schemata: Vec<Schema> = Schema::parse_list([SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_E_STR]).expect("parsing schemata"); @@ -187,22 +185,19 @@ fn test_avro_3901_union_schema_round_trip_with_out_of_order_null() -> AvroResult field_union: UnionANoneB::A(A { field_a: 23.75 }), field_e: "barme".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; let input = E { field_union: UnionANoneB::None, field_e: "barme2".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; let input = E { field_union: UnionANoneB::B(B { field_b: 89 }), field_e: "barme3".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; Ok(()) } @@ -230,7 +225,7 @@ struct F { } #[test] -fn test_avro_3901_union_schema_round_trip_with_end_null() -> AvroResult<()> { +fn test_avro_3901_union_schema_round_trip_with_end_null() -> TestResult { let schemata: Vec<Schema> = Schema::parse_list([SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_F_STR]).expect("parsing schemata"); @@ -238,22 +233,19 @@ fn test_avro_3901_union_schema_round_trip_with_end_null() -> AvroResult<()> { field_union: UnionABNone::A(A { field_a: 23.75 }), field_f: "aoe".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; let input = F { field_union: UnionABNone::B(B { field_b: 89 }), field_f: "aoe3".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; let input = F { field_union: UnionABNone::None, field_f: "aoee2".to_string(), }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[2], &schemata)?; Ok(()) } @@ -321,22 +313,20 @@ struct H { } #[test] -fn test_avro_3901_union_schema_as_optional() -> AvroResult<()> { +fn test_avro_3901_union_schema_as_optional() -> TestResult { let schemata: Vec<Schema> = Schema::parse_list([SCHEMA_H_STR]).expect("parsing schemata"); let input = H { field_union: Some(23), field_h: "aaa".to_string(), }; - let output = encode_decode(&input, &schemata[0], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[0], &schemata)?; let input = H { field_union: None, field_h: "bbb".to_string(), }; - let output = encode_decode(&input, &schemata[0], &schemata)?; - assert_eq!(input, output); + assert_roundtrip(&input, &schemata[0], &schemata)?; Ok(()) } diff --git a/avro_derive/tests/derive.rs b/avro_derive/tests/derive.rs index 66d179e..d6af1d9 100644 --- a/avro_derive/tests/derive.rs +++ b/avro_derive/tests/derive.rs @@ -15,22 +15,23 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{ - AvroSchema, AvroSchemaComponent, Reader, Schema, Writer, from_value, - schema::{Alias, EnumSchema, FixedSchema, Name, RecordSchema}, -}; -use proptest::prelude::*; -use serde::{Deserialize, Serialize, de::DeserializeOwned}; use std::{ borrow::Cow, collections::{HashMap, HashSet}, sync::Mutex, time::Duration, }; -use uuid::Uuid; -use apache_avro::schema::NamespaceRef; +use apache_avro::{ + AvroSchema, AvroSchemaComponent, Schema, Writer, + reader::datum::GenericDatumReader, + schema::{Alias, EnumSchema, FixedSchema, Name, NamespaceRef, RecordSchema}, + writer::datum::GenericDatumWriter, +}; use pretty_assertions::assert_eq; +use proptest::prelude::*; +use serde::{Deserialize, Serialize, de::DeserializeOwned}; +use uuid::Uuid; /// Takes in a type that implements the right combination of traits and runs it through a Serde Cycle and asserts the result is the same #[track_caller] @@ -55,11 +56,11 @@ where T: Serialize + AvroSchema, { let schema = T::get_schema(); - let mut writer = Writer::new(&schema, Vec::new()).unwrap(); - if let Err(e) = writer.append_ser(obj) { - panic!("{e:?}"); - } - writer.into_inner().unwrap() + GenericDatumWriter::builder(&schema) + .build() + .unwrap() + .write_ser_to_vec(&obj) + .unwrap() } #[track_caller] @@ -69,19 +70,11 @@ where { assert!(!encoded.is_empty()); let schema = T::get_schema(); - let mut reader = Reader::builder(&encoded[..]) - .reader_schema(&schema) + GenericDatumReader::builder(&schema) .build() - .unwrap(); - if let Some(res) = reader.next() { - match res { - Ok(value) => { - return from_value::<T>(&value).unwrap(); - } - Err(e) => panic!("{e:?}"), - } - } - unreachable!() + .unwrap() + .read_deser(&mut &encoded[..]) + .unwrap() } #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] @@ -1873,8 +1866,9 @@ fn avro_rs_397_with() { } mod module { - use super::*; use apache_avro::schema::NamespaceRef; + + use super::*; pub fn get_schema_in_ctxt( _named_schemas: &mut HashSet<Name>, _enclosing_namespace: NamespaceRef, diff --git a/avro_derive/tests/serde.rs b/avro_derive/tests/serde.rs index 448d59f..4970d9e 100644 --- a/avro_derive/tests/serde.rs +++ b/avro_derive/tests/serde.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{AvroSchema, Error, Reader, Schema, Writer, from_value}; +use apache_avro::{ + AvroSchema, Error, Schema, reader::datum::GenericDatumReader, writer::datum::GenericDatumWriter, +}; use serde::{Deserialize, Serialize, de::DeserializeOwned}; /// Takes in a type that implements the right combination of traits and runs it through a Serde @@ -51,9 +53,9 @@ where T: Serialize + AvroSchema, { let schema = T::get_schema(); - let mut writer = Writer::new(&schema, Vec::new())?; - writer.append_ser(obj)?; - writer.into_inner() + GenericDatumWriter::builder(&schema) + .build()? + .write_ser_to_vec(&obj) } fn de<T>(encoded: Vec<u8>) -> Result<T, Error> @@ -62,13 +64,9 @@ where { assert!(!encoded.is_empty()); let schema = T::get_schema(); - let mut reader = Reader::builder(&encoded[..]) - .reader_schema(&schema) - .build()?; - if let Some(res) = reader.next() { - return res.and_then(|v| from_value::<T>(&v)); - } - panic!("Nothing was encoded!") + GenericDatumReader::builder(&schema) + .build()? + .read_deser(&mut &encoded[..]) } mod container_attributes {
