This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f18cf95 feat: Replace `to_datum*` functions with `GenericDatumWriter`
(#499)
f18cf95 is described below
commit f18cf9573574d4a4a54a4bedd620776527e8a7cd
Author: Kriskras99 <[email protected]>
AuthorDate: Thu Mar 5 07:05:51 2026 +0100
feat: Replace `to_datum*` functions with `GenericDatumWriter` (#499)
* feat: Replace `to_datum*` functions with `GenericDatumWriter`
* fmt
* fix: Improve documentation and fix resolving
* fix: `Value::validate_schemata` should not set `enclosing_namespace`
`enclosing_namespace` is the namespace of the parent schema, but
there is no parent schema and should therefore not be set.
---------
Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
Co-authored-by: default <[email protected]>
---
avro/benches/single.rs | 67 +++++++-
avro/src/lib.rs | 6 +-
avro/src/schema/mod.rs | 9 +-
avro/src/serde/de.rs | 11 +-
avro/src/types.rs | 10 +-
avro/src/writer/datum.rs | 247 ++++++++++++++++++++----------
avro/src/writer/single_object.rs | 40 ++++-
avro/tests/avro-3786.rs | 27 +++-
avro/tests/avro-3787.rs | 11 +-
avro/tests/io.rs | 45 ++++--
avro/tests/schema.rs | 26 ++--
avro/tests/to_from_avro_datum_schemata.rs | 13 +-
12 files changed, 375 insertions(+), 137 deletions(-)
diff --git a/avro/benches/single.rs b/avro/benches/single.rs
index 05d8453..862a15f 100644
--- a/avro/benches/single.rs
+++ b/avro/benches/single.rs
@@ -15,12 +15,13 @@
// specific language governing permissions and limitations
// under the License.
+use apache_avro::writer::datum::GenericDatumWriter;
use apache_avro::{
schema::Schema,
- to_avro_datum,
types::{Record, Value},
};
use criterion::{Criterion, criterion_group, criterion_main};
+use std::time::Duration;
const RAW_SMALL_SCHEMA: &str = r#"
{
@@ -174,20 +175,74 @@ fn make_big_record() -> Result<(Schema, Value), Box<dyn
std::error::Error>> {
fn bench_small_schema_write_record(c: &mut Criterion) {
let (schema, record) = make_small_record().unwrap();
c.bench_function("small record", |b| {
- b.iter(|| to_avro_datum(&schema, record.clone()))
+ b.iter(|| {
+ GenericDatumWriter::builder(&schema)
+ .build()
+ .unwrap()
+ .write_value_to_vec(record.clone())
+ })
});
}
fn bench_big_schema_write_record(c: &mut Criterion) {
let (schema, record) = make_big_record().unwrap();
c.bench_function("big record", |b| {
- b.iter(|| to_avro_datum(&schema, record.clone()))
+ b.iter(|| {
+ GenericDatumWriter::builder(&schema)
+ .build()
+ .unwrap()
+ .write_value_to_vec(record.clone())
+ })
+ });
+}
+
+fn bench_small_schema_write_record_reuse_datum_writer(c: &mut Criterion) {
+ let (schema, record) = make_small_record().unwrap();
+ let writer = GenericDatumWriter::builder(&schema).build().unwrap();
+ c.bench_function("small record (reused writer)", |b| {
+ b.iter(|| writer.write_value_ref(&mut Vec::new(), &record))
+ });
+}
+
+fn bench_big_schema_write_record_reuse_datum_writer(c: &mut Criterion) {
+ let (schema, record) = make_big_record().unwrap();
+ let writer = GenericDatumWriter::builder(&schema).build().unwrap();
+ c.bench_function("big record (reused writer)", |b| {
+ b.iter(|| writer.write_value_ref(&mut Vec::new(), &record))
+ });
+}
+
+fn bench_small_schema_write_record_no_validation(c: &mut Criterion) {
+ let (schema, record) = make_small_record().unwrap();
+ let writer = GenericDatumWriter::builder(&schema)
+ .validate(false)
+ .build()
+ .unwrap();
+ c.bench_function("small record (no validation)", |b| {
+ b.iter(|| writer.write_value_ref(&mut Vec::new(), &record))
+ });
+}
+
+fn bench_big_schema_write_record_no_validation(c: &mut Criterion) {
+ let (schema, record) = make_big_record().unwrap();
+ let writer = GenericDatumWriter::builder(&schema)
+ .validate(false)
+ .build()
+ .unwrap();
+ c.bench_function("big record (no validation)", |b| {
+ b.iter(|| writer.write_value_ref(&mut Vec::new(), &record))
});
}
criterion_group!(
- benches,
- bench_small_schema_write_record,
- bench_big_schema_write_record
+ name = benches;
+ config =
Criterion::default().sample_size(200).measurement_time(Duration::from_secs(10));
+ targets =
+ bench_small_schema_write_record,
+ bench_big_schema_write_record,
+ bench_small_schema_write_record_reuse_datum_writer,
+ bench_big_schema_write_record_reuse_datum_writer,
+ bench_small_schema_write_record_no_validation,
+ bench_big_schema_write_record_no_validation,
);
criterion_main!(benches);
diff --git a/avro/src/lib.rs b/avro/src/lib.rs
index 3a2981f..f2f9975 100644
--- a/avro/src/lib.rs
+++ b/avro/src/lib.rs
@@ -57,7 +57,6 @@ mod decode;
mod duration;
mod encode;
mod reader;
-mod writer;
#[cfg(doc)]
pub mod documentation;
@@ -71,6 +70,7 @@ pub mod serde;
pub mod types;
pub mod util;
pub mod validator;
+pub mod writer;
#[expect(deprecated)]
pub use crate::{
@@ -98,6 +98,10 @@ pub use reader::{
pub use schema::Schema;
pub use serde::{AvroSchema, AvroSchemaComponent, from_value, to_value};
pub use uuid::Uuid;
+#[expect(
+ deprecated,
+ reason = "Still need to export it until we remove it completely"
+)]
pub use writer::{
Clearable, Writer, WriterBuilder,
datum::{to_avro_datum, to_avro_datum_schemata, write_avro_datum_ref},
diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs
index 3fd776c..91b0450 100644
--- a/avro/src/schema/mod.rs
+++ b/avro/src/schema/mod.rs
@@ -1168,6 +1168,7 @@ fn field_ordering_position(field: &str) -> Option<usize> {
#[cfg(test)]
mod tests {
use super::*;
+ use crate::writer::datum::GenericDatumWriter;
use crate::{error::Details, rabin::Rabin};
use apache_avro_test_helper::{
TestResult,
@@ -2992,7 +2993,9 @@ mod tests {
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = crate::from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
@@ -3573,7 +3576,9 @@ mod tests {
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = crate::to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
// Now, attempt to deserialize using the reader schema.
let reader_schema = Schema::parse(&reader_schema)?;
diff --git a/avro/src/serde/de.rs b/avro/src/serde/de.rs
index 18c42a2..ee9dec1 100644
--- a/avro/src/serde/de.rs
+++ b/avro/src/serde/de.rs
@@ -954,6 +954,7 @@ mod tests {
use super::*;
use crate::Decimal;
+ use crate::writer::datum::GenericDatumWriter;
#[derive(PartialEq, Eq, Serialize, Deserialize, Debug)]
pub struct StringEnum {
@@ -989,7 +990,11 @@ mod tests {
// encode into avro
let value = crate::to_value(&data)?;
- let mut buf = std::io::Cursor::new(crate::to_avro_datum(&schema,
value)?);
+ let mut buf = std::io::Cursor::new(
+ GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(value)?,
+ );
// decode from avro
let value = crate::from_avro_datum(&schema, &mut buf, None)?;
@@ -1031,7 +1036,9 @@ mod tests {
let value = crate::to_value(data)?;
// The following sentence have to fail has the data is wrong.
- let encoded_data = crate::to_avro_datum(&schema, value);
+ let encoded_data = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(value);
assert!(encoded_data.is_err());
diff --git a/avro/src/types.rs b/avro/src/types.rs
index 90ea2b0..b6f658c 100644
--- a/avro/src/types.rs
+++ b/avro/src/types.rs
@@ -380,10 +380,8 @@ impl Value {
let rs = ResolvedSchema::try_from(schemata.clone())
.expect("Schemata didn't successfully resolve");
let schemata_len = schemata.len();
- schemata.iter().any(|schema| {
- let enclosing_namespace = schema.namespace();
-
- match self.validate_internal(schema, rs.get_names(),
enclosing_namespace) {
+ schemata.iter().any(
+ |schema| match self.validate_internal(schema, rs.get_names(),
None) {
Some(reason) => {
let log_message =
format!("Invalid value: {self:?} for schema:
{schema:?}. Reason: {reason}");
@@ -395,8 +393,8 @@ impl Value {
false
}
None => true,
- }
- })
+ },
+ )
}
fn accumulate(accumulator: Option<String>, other: Option<String>) ->
Option<String> {
diff --git a/avro/src/writer/datum.rs b/avro/src/writer/datum.rs
index 14a40a2..c5eb4a2 100644
--- a/avro/src/writer/datum.rs
+++ b/avro/src/writer/datum.rs
@@ -15,32 +15,157 @@
// specific language governing permissions and limitations
// under the License.
-use std::io::Write;
-
+use bon::bon;
use serde::Serialize;
+use std::io::Write;
use crate::{
AvroResult, Schema,
- encode::{encode, encode_internal},
+ encode::encode_internal,
error::Details,
- schema::{NamesRef, ResolvedOwnedSchema, ResolvedSchema},
+ schema::{NamesRef, ResolvedSchema},
serde::ser_schema::SchemaAwareWriteSerializer,
types::Value,
};
+/// Writer for writing raw Avro data.
+///
+/// This is most likely not what you need. Most users should use
[`Writer`][crate::Writer],
+/// [`GenericSingleObjectWriter`][crate::GenericSingleObjectWriter], or
+/// [`SpecificSingleObjectWriter`][crate::SpecificSingleObjectWriter] instead.
+pub struct GenericDatumWriter<'s> {
+ schema: &'s Schema,
+ resolved: ResolvedSchema<'s>,
+ validate: bool,
+}
+
+#[bon]
+impl<'s> GenericDatumWriter<'s> {
+ /// Configure a new writer.
+ #[builder]
+ pub fn new(
+ /// The schema for the data that will be written
+ #[builder(start_fn)]
+ schema: &'s Schema,
+ /// Already resolved schemata that will be used to resolve references
in the writer's schema.
+ ///
+ /// You can also use [`Self::schemata`] instead.
+ resolved_schemata: Option<ResolvedSchema<'s>>,
+ /// Validate values against the writer schema before writing them.
+ ///
+ /// Defaults to `true`.
+ ///
+ /// Setting this to `false` and writing values that don't match the
schema will make the
+ /// written data unreadable.
+ #[builder(default = true)]
+ validate: bool,
+ ) -> AvroResult<Self> {
+ let resolved = if let Some(resolved) = resolved_schemata {
+ resolved
+ } else {
+ ResolvedSchema::try_from(schema)?
+ };
+ Ok(Self {
+ schema,
+ resolved,
+ validate,
+ })
+ }
+}
+
+impl<'s, S: generic_datum_writer_builder::State> GenericDatumWriterBuilder<'s,
S> {
+ /// Set the schemata that will be used to resolve any references in the
schema.
+ ///
+ /// This is equivalent to
`.resolved_schemata(ResolvedSchema::new_with_schemata(schemata)?)`.
+ /// If you already have a [`ResolvedSchema`], use that function instead.
+ pub fn schemata(
+ self,
+ schemata: Vec<&'s Schema>,
+ ) -> AvroResult<
+ GenericDatumWriterBuilder<'s,
generic_datum_writer_builder::SetResolvedSchemata<S>>,
+ >
+ where
+ S::ResolvedSchemata: generic_datum_writer_builder::IsUnset,
+ {
+ let resolved = ResolvedSchema::new_with_schemata(schemata)?;
+ Ok(self.resolved_schemata(resolved))
+ }
+}
+
+impl GenericDatumWriter<'_> {
+ /// Write a value to the writer.
+ pub fn write_value<W: Write, V: Into<Value>>(
+ &self,
+ writer: &mut W,
+ value: V,
+ ) -> AvroResult<usize> {
+ let value = value.into();
+ self.write_value_ref(writer, &value)
+ }
+
+ /// Write a value to the writer.
+ pub fn write_value_ref<W: Write>(&self, writer: &mut W, value: &Value) ->
AvroResult<usize> {
+ if self.validate
+ && value
+ .validate_internal(self.schema, self.resolved.get_names(),
None)
+ .is_some()
+ {
+ return Err(Details::Validation.into());
+ }
+ encode_internal(value, self.schema, self.resolved.get_names(), None,
writer)
+ }
+
+ /// Write a value to a [`Vec`].
+ pub fn write_value_to_vec<V: Into<Value>>(&self, value: V) ->
AvroResult<Vec<u8>> {
+ let mut vec = Vec::new();
+ self.write_value(&mut vec, value)?;
+ Ok(vec)
+ }
+
+ /// Serialize `T` to the writer.
+ pub fn write_ser<W: Write, T: Serialize>(
+ &self,
+ writer: &mut W,
+ value: &T,
+ ) -> AvroResult<usize> {
+ let mut serializer =
+ SchemaAwareWriteSerializer::new(writer, self.schema,
self.resolved.get_names(), None);
+ value.serialize(&mut serializer)
+ }
+
+ /// Serialize `T` to a [`Vec`].
+ pub fn write_ser_to_vec<T: Serialize>(&self, value: &T) ->
AvroResult<Vec<u8>> {
+ let mut vec = Vec::new();
+ self.write_ser(&mut vec, value)?;
+ Ok(vec)
+ }
+}
+
+/// Deprecated. Use [`GenericDatumWriter`] instead.
+///
+/// This is equivalent to:
+/// ```ignore
+/// GenericDatumWriter::builder(schema)
+/// .build()?
+/// .write_value_to_vec(value)
+/// ```
+///
/// Encode a value into raw Avro data, also performs schema validation.
///
/// **NOTE**: This function has a quite small niche of usage and does NOT
generate headers and sync
/// markers; use [`Writer`] to be fully Avro-compatible if you don't know what
/// you are doing, instead.
+#[deprecated(since = "0.22.0", note = "Use GenericDatumWriter instead")]
pub fn to_avro_datum<T: Into<Value>>(schema: &Schema, value: T) ->
AvroResult<Vec<u8>> {
- let mut buffer = Vec::new();
- write_avro_datum(schema, value, &mut buffer)?;
- Ok(buffer)
+ GenericDatumWriter::builder(schema)
+ .build()?
+ .write_value_to_vec(value)
}
/// Write the referenced [Serialize]able object to the provided [Write] object.
///
+/// It is recommended to use [`GenericDatumWriter`] instead.
+///
/// Returns a result with the number of bytes written.
///
/// **NOTE**: This function has a quite small niche of usage and does **NOT**
generate headers and sync
@@ -56,84 +181,34 @@ pub fn write_avro_datum_ref<T: Serialize, W: Write>(
data.serialize(&mut serializer)
}
+/// Deprecated. Use [`GenericDatumWriter`] instead.
+///
+/// This is equivalent to:
+/// ```ignore
+/// GenericDatumWriter::builder(schema)
+/// .schemata(schemata)?
+/// .build()?
+/// .write_value_to_vec(value)
+/// ```
+///
/// Encode a value into raw Avro data, also performs schema validation.
///
/// If the provided `schema` is incomplete then its dependencies must be
/// provided in `schemata`
+#[deprecated(since = "0.22.0", note = "Use GenericDatumWriter instead")]
pub fn to_avro_datum_schemata<T: Into<Value>>(
schema: &Schema,
schemata: Vec<&Schema>,
value: T,
) -> AvroResult<Vec<u8>> {
- let mut buffer = Vec::new();
- write_avro_datum_schemata(schema, schemata, value, &mut buffer)?;
- Ok(buffer)
-}
-
-/// Encode a value into raw Avro data, also performs schema validation.
-///
-/// This is an internal function which gets the bytes buffer where to write as
parameter instead of
-/// creating a new one like `to_avro_datum`.
-pub(super) fn write_avro_datum<T: Into<Value>, W: Write>(
- schema: &Schema,
- value: T,
- writer: &mut W,
-) -> AvroResult<()> {
- let avro = value.into();
- if !avro.validate(schema) {
- return Err(Details::Validation.into());
- }
- encode(&avro, schema, writer)?;
- Ok(())
-}
-
-pub(super) fn write_avro_datum_schemata<T: Into<Value>>(
- schema: &Schema,
- schemata: Vec<&Schema>,
- value: T,
- buffer: &mut Vec<u8>,
-) -> AvroResult<usize> {
- let avro = value.into();
- let rs = ResolvedSchema::try_from(schemata)?;
- let names = rs.get_names();
- let enclosing_namespace = schema.namespace();
- if let Some(_err) = avro.validate_internal(schema, names,
enclosing_namespace) {
- return Err(Details::Validation.into());
- }
- encode_internal(&avro, schema, names, enclosing_namespace, buffer)
-}
-
-pub(super) fn write_value_ref_owned_resolved<W: Write>(
- resolved_schema: &ResolvedOwnedSchema,
- value: &Value,
- writer: &mut W,
-) -> AvroResult<usize> {
- let root_schema = resolved_schema.get_root_schema();
- if let Some(reason) = value.validate_internal(
- root_schema,
- resolved_schema.get_names(),
- root_schema.namespace(),
- ) {
- return Err(Details::ValidationWithReason {
- value: value.clone(),
- schema: root_schema.clone(),
- reason,
- }
- .into());
- }
- encode_internal(
- value,
- root_schema,
- resolved_schema.get_names(),
- root_schema.namespace(),
- writer,
- )
+ GenericDatumWriter::builder(schema)
+ .schemata(schemata)?
+ .build()?
+ .write_value_to_vec(value)
}
#[cfg(test)]
mod tests {
- use std::collections::HashMap;
-
use apache_avro_test_helper::TestResult;
use crate::{
@@ -177,7 +252,11 @@ mod tests {
zig_i64(3, &mut expected)?;
expected.extend([b'f', b'o', b'o']);
- assert_eq!(to_avro_datum(&schema, record)?, expected);
+ let written = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(record)?;
+
+ assert_eq!(written, expected);
Ok(())
}
@@ -202,7 +281,9 @@ mod tests {
zig_i64(3, &mut expected)?;
expected.extend([b'f', b'o', b'o']);
- let bytes = write_avro_datum_ref(&schema, &HashMap::new(), &data, &mut
writer)?;
+ let bytes = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_ser(&mut writer, &data)?;
assert_eq!(bytes, expected.len());
assert_eq!(writer, expected);
@@ -219,7 +300,10 @@ mod tests {
zig_i64(1, &mut expected)?;
zig_i64(3, &mut expected)?;
- assert_eq!(to_avro_datum(&schema, union)?, expected);
+ let written = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(union)?;
+ assert_eq!(written, expected);
Ok(())
}
@@ -232,7 +316,10 @@ mod tests {
let mut expected = Vec::new();
zig_i64(0, &mut expected)?;
- assert_eq!(to_avro_datum(&schema, union)?, expected);
+ let written = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(union)?;
+ assert_eq!(written, expected);
Ok(())
}
@@ -249,8 +336,12 @@ mod tests {
let schema = Schema::parse_str(schema_str)?;
assert_eq!(&schema, expected_schema);
// The serialized format should be the same as the schema.
- let ser = to_avro_datum(&schema, value.clone())?;
- let raw_ser = to_avro_datum(raw_schema, raw_value)?;
+ let ser = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(value.clone())?;
+ let raw_ser = GenericDatumWriter::builder(raw_schema)
+ .build()?
+ .write_value_to_vec(raw_value)?;
assert_eq!(ser, raw_ser);
// Should deserialize from the schema into the logical type.
diff --git a/avro/src/writer/single_object.rs b/avro/src/writer/single_object.rs
index 136986a..d5c2c8c 100644
--- a/avro/src/writer/single_object.rs
+++ b/avro/src/writer/single_object.rs
@@ -19,14 +19,14 @@ use std::{io::Write, marker::PhantomData,
ops::RangeInclusive};
use serde::Serialize;
+use crate::encode::encode_internal;
+use crate::serde::ser_schema::SchemaAwareWriteSerializer;
use crate::{
AvroResult, AvroSchema, Schema,
error::Details,
headers::{HeaderBuilder, RabinFingerprintHeader},
schema::ResolvedOwnedSchema,
types::Value,
- write_avro_datum_ref,
- writer::datum::write_value_ref_owned_resolved,
};
/// Writer that encodes messages according to the single object encoding v1
spec
@@ -163,12 +163,13 @@ where
.write_all(&self.header)
.map_err(Details::WriteBytes)?;
- let bytes = write_avro_datum_ref(
+ let mut serializer = SchemaAwareWriteSerializer::new(
+ writer,
self.resolved.get_root_schema(),
self.resolved.get_names(),
- data,
- writer,
- )?;
+ None,
+ );
+ let bytes = data.serialize(&mut serializer)?;
Ok(bytes + self.header.len())
}
@@ -184,6 +185,33 @@ where
}
}
+fn write_value_ref_owned_resolved<W: Write>(
+ resolved_schema: &ResolvedOwnedSchema,
+ value: &Value,
+ writer: &mut W,
+) -> AvroResult<usize> {
+ let root_schema = resolved_schema.get_root_schema();
+ if let Some(reason) = value.validate_internal(
+ root_schema,
+ resolved_schema.get_names(),
+ root_schema.namespace(),
+ ) {
+ return Err(Details::ValidationWithReason {
+ value: value.clone(),
+ schema: root_schema.clone(),
+ reason,
+ }
+ .into());
+ }
+ encode_internal(
+ value,
+ root_schema,
+ resolved_schema.get_names(),
+ root_schema.namespace(),
+ writer,
+ )
+}
+
#[cfg(test)]
mod tests {
use apache_avro_test_helper::TestResult;
diff --git a/avro/tests/avro-3786.rs b/avro/tests/avro-3786.rs
index c509a3f..461de7c 100644
--- a/avro/tests/avro-3786.rs
+++ b/avro/tests/avro-3786.rs
@@ -15,7 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-use apache_avro::{Schema, from_avro_datum, to_avro_datum, to_value, types};
+use apache_avro::writer::datum::GenericDatumWriter;
+use apache_avro::{Schema, from_avro_datum, to_value, types};
use apache_avro_test_helper::TestResult;
#[test]
@@ -132,7 +133,9 @@ fn avro_3786_deserialize_union_with_different_enum_order()
-> TestResult {
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
@@ -256,7 +259,9 @@ fn
avro_3786_deserialize_union_with_different_enum_order_defined_in_record() ->
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
@@ -369,7 +374,9 @@ fn
test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
@@ -482,7 +489,9 @@ fn
test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
@@ -595,7 +604,9 @@ fn
deserialize_union_with_different_enum_order_defined_in_record() -> TestResult
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
@@ -869,7 +880,9 @@ fn
deserialize_union_with_record_with_enum_defined_inline_reader_has_different_i
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
diff --git a/avro/tests/avro-3787.rs b/avro/tests/avro-3787.rs
index 192c573..03d6158 100644
--- a/avro/tests/avro-3787.rs
+++ b/avro/tests/avro-3787.rs
@@ -15,7 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-use apache_avro::{Schema, from_avro_datum, to_avro_datum, to_value, types};
+use apache_avro::writer::datum::GenericDatumWriter;
+use apache_avro::{Schema, from_avro_datum, to_value, types};
use apache_avro_test_helper::TestResult;
#[test]
@@ -133,7 +134,9 @@ fn avro_3787_deserialize_union_with_unknown_symbol() ->
TestResult {
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
@@ -264,7 +267,9 @@ fn avro_3787_deserialize_union_with_unknown_symbol_no_ref()
-> TestResult {
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
diff --git a/avro/tests/io.rs b/avro/tests/io.rs
index dba7eef..ae51a79 100644
--- a/avro/tests/io.rs
+++ b/avro/tests/io.rs
@@ -16,7 +16,8 @@
// under the License.
//! Port of
<https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py>
-use apache_avro::{Error, Schema, error::Details, from_avro_datum,
to_avro_datum, types::Value};
+use apache_avro::writer::datum::GenericDatumWriter;
+use apache_avro::{Error, Schema, error::Details, from_avro_datum,
types::Value};
use apache_avro_test_helper::TestResult;
use pretty_assertions::assert_eq;
use std::{io::Cursor, sync::OnceLock};
@@ -231,7 +232,9 @@ fn test_validate() -> TestResult {
fn test_round_trip() -> TestResult {
for (raw_schema, value) in schemas_to_validate().iter() {
let schema = Schema::parse_str(raw_schema)?;
- let encoded = to_avro_datum(&schema, value.clone()).unwrap();
+ let encoded = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(value.clone())?;
let decoded = from_avro_datum(&schema, &mut Cursor::new(encoded),
None).unwrap();
assert_eq!(value, &decoded);
}
@@ -242,7 +245,9 @@ fn test_round_trip() -> TestResult {
#[test]
fn test_binary_int_encoding() -> TestResult {
for (number, hex_encoding) in binary_encodings().iter() {
- let encoded = to_avro_datum(&Schema::Int, Value::Int(*number as i32))?;
+ let encoded = GenericDatumWriter::builder(&Schema::Int)
+ .build()?
+ .write_value_to_vec(Value::Int(*number as i32))?;
assert_eq!(&encoded, hex_encoding);
}
@@ -252,7 +257,9 @@ fn test_binary_int_encoding() -> TestResult {
#[test]
fn test_binary_long_encoding() -> TestResult {
for (number, hex_encoding) in binary_encodings().iter() {
- let encoded = to_avro_datum(&Schema::Long, Value::Long(*number))?;
+ let encoded = GenericDatumWriter::builder(&Schema::Long)
+ .build()?
+ .write_value_to_vec(Value::Long(*number))?;
assert_eq!(&encoded, hex_encoding);
}
@@ -275,7 +282,9 @@ fn test_schema_promotion() -> TestResult {
let original_value = &promotable_values[i];
for (j, reader_raw_schema) in
promotable_schemas.iter().enumerate().skip(i + 1) {
let reader_schema = Schema::parse_str(reader_raw_schema)?;
- let encoded = to_avro_datum(&writer_schema,
original_value.clone())?;
+ let encoded = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(original_value.clone())?;
let decoded = from_avro_datum(
&writer_schema,
&mut Cursor::new(encoded),
@@ -298,7 +307,9 @@ fn test_unknown_symbol() -> TestResult {
let reader_schema =
Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols":
["BAR", "BAZ"]}"#)?;
let original_value = Value::Enum(0, "FOO".to_string());
- let encoded = to_avro_datum(&writer_schema, original_value)?;
+ let encoded = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(original_value.clone())?;
let decoded = from_avro_datum(
&writer_schema,
&mut Cursor::new(encoded),
@@ -322,7 +333,9 @@ fn test_default_value() -> TestResult {
}}"#
))?;
let datum_to_read = Value::Record(vec![("H".to_string(),
default_datum.clone())]);
- let encoded = to_avro_datum(long_record_schema(),
long_record_datum().clone())?;
+ let encoded = GenericDatumWriter::builder(long_record_schema())
+ .build()?
+ .write_value_to_vec(long_record_datum().clone())?;
let datum_read = from_avro_datum(
long_record_schema(),
&mut Cursor::new(encoded),
@@ -379,7 +392,9 @@ fn test_no_default_value() -> TestResult {
]
}"#,
)?;
- let encoded = to_avro_datum(long_record_schema(),
long_record_datum().clone())?;
+ let encoded = GenericDatumWriter::builder(long_record_schema())
+ .build()?
+ .write_value_to_vec(long_record_datum().clone())?;
let result = from_avro_datum(
long_record_schema(),
&mut Cursor::new(encoded),
@@ -408,7 +423,9 @@ fn test_projection() -> TestResult {
("E".to_string(), Value::Int(5)),
("F".to_string(), Value::Int(6)),
]);
- let encoded = to_avro_datum(long_record_schema(),
long_record_datum().clone())?;
+ let encoded = GenericDatumWriter::builder(long_record_schema())
+ .build()?
+ .write_value_to_vec(long_record_datum().clone())?;
let datum_read = from_avro_datum(
long_record_schema(),
&mut Cursor::new(encoded),
@@ -437,7 +454,9 @@ fn test_field_order() -> TestResult {
("F".to_string(), Value::Int(6)),
("E".to_string(), Value::Int(5)),
]);
- let encoded = to_avro_datum(long_record_schema(),
long_record_datum().clone())?;
+ let encoded = GenericDatumWriter::builder(long_record_schema())
+ .build()?
+ .write_value_to_vec(long_record_datum().clone())?;
let datum_read = from_avro_datum(
long_record_schema(),
&mut Cursor::new(encoded),
@@ -467,7 +486,11 @@ fn test_type_exception() -> Result<(), String> {
("E".to_string(), Value::Int(5)),
("F".to_string(), Value::String(String::from("Bad"))),
]);
- let encoded = to_avro_datum(&writer_schema,
datum_to_write).map_err(Error::into_details);
+ let encoded = GenericDatumWriter::builder(&writer_schema)
+ .build()
+ .unwrap()
+ .write_value_to_vec(datum_to_write)
+ .map_err(Error::into_details);
match encoded {
Ok(_) => Err(String::from("Expected ValidationError, got Ok")),
Err(Details::Validation) => Ok(()),
diff --git a/avro/tests/schema.rs b/avro/tests/schema.rs
index 3347e83..3542657 100644
--- a/avro/tests/schema.rs
+++ b/avro/tests/schema.rs
@@ -15,17 +15,13 @@
// specific language governing permissions and limitations
// under the License.
-use std::{
- collections::HashMap,
- io::{Cursor, Read},
-};
-
+use apache_avro::writer::datum::GenericDatumWriter;
use apache_avro::{
Codec, Error, Reader, Schema, Writer,
error::Details,
from_avro_datum, from_value,
schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema},
- to_avro_datum, to_value,
+ to_value,
types::{Record, Value},
};
use apache_avro_test_helper::{
@@ -33,6 +29,11 @@ use apache_avro_test_helper::{
data::{DOC_EXAMPLES, OTHER_ATTRIBUTES_EXAMPLES, examples, valid_examples},
init,
};
+use serde::{Deserialize, Serialize};
+use std::{
+ collections::HashMap,
+ io::{Cursor, Read},
+};
#[test]
fn test_correct_recursive_extraction() -> TestResult {
@@ -857,7 +858,9 @@ fn avro_old_issue_47() -> TestResult {
};
let ser_value = to_value(record.clone())?;
- let serialized_bytes = to_avro_datum(&schema, ser_value)?;
+ let serialized_bytes = GenericDatumWriter::builder(&schema)
+ .build()?
+ .write_value_to_vec(ser_value)?;
let de_value = &from_avro_datum(&schema, &mut &*serialized_bytes, None)?;
let deserialized_record = from_value::<MyRecord>(de_value)?;
@@ -869,9 +872,6 @@ fn avro_old_issue_47() -> TestResult {
#[test]
fn
test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_type()
-> TestResult
{
- use apache_avro::{from_avro_datum, to_avro_datum, types::Value};
- use serde::{Deserialize, Serialize};
-
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
pub struct BarUseParent {
#[serde(rename = "barUse")]
@@ -978,12 +978,14 @@ fn
test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_
bar_init: Bar::Bar0,
bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }),
};
- let avro_value = crate::to_value(foo1)?;
+ let avro_value = to_value(foo1)?;
assert!(
avro_value.validate(&writer_schema),
"value is valid for schema",
);
- let datum = to_avro_datum(&writer_schema, avro_value)?;
+ let datum = GenericDatumWriter::builder(&writer_schema)
+ .build()?
+ .write_value_to_vec(avro_value)?;
let mut x = &datum[..];
let reader_schema = Schema::parse_str(reader_schema)?;
let deser_value = from_avro_datum(&writer_schema, &mut x,
Some(&reader_schema))?;
diff --git a/avro/tests/to_from_avro_datum_schemata.rs
b/avro/tests/to_from_avro_datum_schemata.rs
index 41c0812..5cc7973 100644
--- a/avro/tests/to_from_avro_datum_schemata.rs
+++ b/avro/tests/to_from_avro_datum_schemata.rs
@@ -15,9 +15,10 @@
// specific language governing permissions and limitations
// under the License.
+use apache_avro::writer::datum::GenericDatumWriter;
use apache_avro::{
Codec, Reader, Schema, Writer, from_avro_datum_reader_schemata,
from_avro_datum_schemata,
- to_avro_datum_schemata, types::Value,
+ types::Value,
};
use apache_avro_test_helper::{TestResult, init};
@@ -52,7 +53,10 @@ fn test_avro_3683_multiple_schemata_to_from_avro_datum() ->
TestResult {
// this is the Schema we want to use for write/read
let schema_b = schemata[1];
let expected: Vec<u8> = vec![0, 0, 128, 63];
- let actual = to_avro_datum_schemata(schema_b, schemata.clone(),
record.clone())?;
+ let actual = GenericDatumWriter::builder(schema_b)
+ .schemata(schemata.clone())?
+ .build()?
+ .write_value_to_vec(record.clone())?;
assert_eq!(actual, expected);
let value = from_avro_datum_schemata(schema_b, schemata, &mut
actual.as_slice(), None)?;
@@ -76,7 +80,10 @@ fn
avro_rs_106_test_multiple_schemata_to_from_avro_datum_with_resolution() -> Te
// this is the Schema we want to use for write/read
let schema_b = schemata[1];
let expected: Vec<u8> = vec![0, 0, 128, 63];
- let actual = to_avro_datum_schemata(schema_b, schemata.clone(),
record.clone())?;
+ let actual = GenericDatumWriter::builder(schema_b)
+ .schemata(schemata.clone())?
+ .build()?
+ .write_value_to_vec(record.clone())?;
assert_eq!(actual, expected);
let value = from_avro_datum_reader_schemata(