This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch avro-3926-allow-serde-uuid-to-fixed-16 in repository https://gitbox.apache.org/repos/asf/avro.git
commit 3b2982401b91532f0691ead58d71002ea85c6480 Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Thu Jan 4 22:07:23 2024 +0200 AVRO-3926: [Rust] Allow UUID to serialize to Fixed[16] Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> --- lang/rust/avro/src/decode.rs | 103 +++++++++++++++++++++++++++++++++++++++---- lang/rust/avro/src/encode.rs | 27 +++++++++--- lang/rust/avro/src/error.rs | 9 ++++ lang/rust/avro/src/schema.rs | 60 ++++++++++++++++++++----- 4 files changed, 173 insertions(+), 26 deletions(-) diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs index bf8477fb7..3171a9426 100644 --- a/lang/rust/avro/src/decode.rs +++ b/lang/rust/avro/src/decode.rs @@ -19,6 +19,7 @@ use crate::{ bigdecimal::deserialize_big_decimal, decimal::Decimal, duration::Duration, + encode::encode_long, schema::{ DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, Schema, @@ -121,15 +122,60 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( value => Err(Error::BytesValue(value.into())), } } - Schema::Uuid => Ok(Value::Uuid( - Uuid::from_str( - match decode_internal(&Schema::String, names, enclosing_namespace, reader)? { - Value::String(ref s) => s, - value => return Err(Error::GetUuidFromStringValue(value.into())), - }, - ) - .map_err(Error::ConvertStrToUuid)?, - )), + Schema::Uuid => { + let len = decode_len(reader)?; + let mut bytes = vec![0u8; len]; + reader.read_exact(&mut bytes).map_err(Error::ReadIntoBuf)?; + + // use a Vec to be able re-read the bytes more than once if needed + let mut reader = Vec::with_capacity(len + 1); + encode_long(len as i64, &mut reader); + reader.extend_from_slice(&bytes); + + let decode_from_string = |reader| match decode_internal( + &Schema::String, + names, + enclosing_namespace, + reader, + )? { + Value::String(ref s) => Uuid::from_str(s).map_err(Error::ConvertStrToUuid), + value => Err(Error::GetUuidFromStringValue(value.into())), + }; + + let uuid: Uuid = if len == 16 { + // most probably a Fixed schema + let fixed_result = decode_internal( + &Schema::Fixed(FixedSchema { + size: 16, + name: "uuid".into(), + aliases: None, + doc: None, + attributes: Default::default(), + }), + names, + enclosing_namespace, + &mut bytes.as_slice(), + ); + if fixed_result.is_ok() { + match fixed_result? { + Value::Fixed(ref size, ref bytes) => { + if *size != 16 { + return Err(Error::ConvertFixedToUuid(*size)); + } + Uuid::from_slice(bytes).map_err(Error::ConvertSliceToUuid)? + } + _ => decode_from_string(&mut reader.as_slice())?, + } + } else { + // try to decode as string + decode_from_string(&mut reader.as_slice())? + } + } else { + // definitely a string + decode_from_string(&mut reader.as_slice())? + }; + Ok(Value::Uuid(uuid)) + } Schema::Int => decode_int(reader), Schema::Date => zag_i32(reader).map(Value::Date), Schema::TimeMillis => zag_i32(reader).map(Value::TimeMillis), @@ -317,6 +363,7 @@ mod tests { use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::collections::HashMap; + use uuid::Uuid; #[test] fn test_decode_array_without_size() -> TestResult { @@ -815,4 +862,42 @@ mod tests { Ok(()) } + + #[test] + fn avro_3926_encode_decode_uuid_to_string() -> TestResult { + use crate::encode::encode; + + let schema = Schema::String; + let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?); + + let mut buffer = Vec::new(); + encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); + + let result = decode(&Schema::Uuid, &mut &buffer[..])?; + assert_eq!(result, value); + + Ok(()) + } + + #[test] + fn avro_3926_encode_decode_uuid_to_fixed() -> TestResult { + use crate::encode::encode; + + let schema = Schema::Fixed(FixedSchema { + size: 16, + name: "uuid".into(), + aliases: None, + doc: None, + attributes: Default::default(), + }); + let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?); + + let mut buffer = Vec::new(); + encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); + + let result = decode(&Schema::Uuid, &mut &buffer[..])?; + assert_eq!(result, value); + + Ok(()) + } } diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs index 23f94664c..390b09164 100644 --- a/lang/rust/avro/src/encode.rs +++ b/lang/rust/avro/src/encode.rs @@ -125,12 +125,27 @@ pub(crate) fn encode_internal<S: Borrow<Schema>>( let slice: [u8; 12] = duration.into(); buffer.extend_from_slice(&slice); } - Value::Uuid(uuid) => encode_bytes( - // we need the call .to_string() to properly convert ASCII to UTF-8 - #[allow(clippy::unnecessary_to_owned)] - &uuid.to_string(), - buffer, - ), + Value::Uuid(uuid) => match *schema { + Schema::String => encode_bytes( + // we need the call .to_string() to properly convert ASCII to UTF-8 + #[allow(clippy::unnecessary_to_owned)] + &uuid.to_string(), + buffer, + ), + Schema::Fixed(FixedSchema { size, .. }) => { + let bytes = uuid.as_bytes(); + if bytes.len() != size { + return Err(Error::EncodeUuidAsFixedError(bytes.len(), size)); + } + encode_bytes(bytes, buffer) + } + _ => { + return Err(Error::EncodeValueAsSchemaError { + value_kind: ValueKind::Uuid, + supported_schema: vec![SchemaKind::Uuid, SchemaKind::Fixed], + }); + } + }, Value::BigDecimal(bg) => { let buf: Vec<u8> = serialize_big_decimal(bg); buffer.extend_from_slice(buf.as_slice()); diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs index 36c8d94a9..bf7a9574c 100644 --- a/lang/rust/avro/src/error.rs +++ b/lang/rust/avro/src/error.rs @@ -89,6 +89,15 @@ pub enum Error { #[error("Failed to convert &str to UUID")] ConvertStrToUuid(#[source] uuid::Error), + #[error("Failed to convert Fixed bytes to UUID. It must be exactly 16 bytes, got {0}")] + ConvertFixedToUuid(usize), + + #[error("Failed to convert Fixed bytes to UUID")] + ConvertSliceToUuid(#[source] uuid::Error), + + #[error("Failed to encode Fixed bytes to UUID: {0} -> {1}")] + EncodeUuidAsFixedError(usize, usize), + #[error("Map key is not a string; key type is {0:?}")] MapKeyType(ValueKind), diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs index f4c063df6..15cb86734 100644 --- a/lang/rust/avro/src/schema.rs +++ b/lang/rust/avro/src/schema.rs @@ -1404,8 +1404,24 @@ impl Parser { return try_convert_to_logical_type( "uuid", parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::String], - |_| -> AvroResult<Schema> { Ok(Schema::Uuid) }, + &[SchemaKind::String, SchemaKind::Fixed], + |schema| match schema { + Schema::String => Ok(Schema::Uuid), + Schema::Fixed(FixedSchema { size, .. }) if size == 16 => { + Ok(Schema::Uuid) + } + Schema::Fixed(FixedSchema { size, .. }) => { + warn!("Ignoring uuid logical type for a Fixed schema because its size ({size:?}) is not 16! Schema: {:?}", schema); + Ok(schema) + } + _ => { + warn!( + "Ignoring invalid uuid logical type for schema: {:?}", + schema + ); + Ok(schema) + } + }, ); } "date" => { @@ -2367,6 +2383,7 @@ pub mod derive { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::logger::{assert_logged, assert_not_logged}; use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde_json::json; @@ -6299,7 +6316,7 @@ mod tests { } #[test] - fn test_avro_3896_uuid_schema() -> TestResult { + fn avro_3896_uuid_schema_for_string() -> TestResult { // string uuid, represents as native logical type. let schema = json!( { @@ -6310,8 +6327,11 @@ mod tests { let parse_result = Schema::parse(&schema)?; assert_eq!(parse_result, Schema::Uuid); - // uuid logical type is not supported for SchemaKind::Fixed, so it is parsed as Schema::Fixed - // and the `logicalType` is preserved as an attribute. + Ok(()) + } + + #[test] + fn avro_3926_uuid_schema_for_fixed_with_size_16() -> TestResult { let schema = json!( { "type": "fixed", @@ -6320,19 +6340,37 @@ mod tests { "logicalType": "uuid" }); let parse_result = Schema::parse(&schema)?; + assert_eq!(parse_result, Schema::Uuid); + assert_not_logged( + r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#, + ); + + Ok(()) + } + + #[test] + fn avro_3926_uuid_schema_for_fixed_with_size_different_than_16() -> TestResult { + let schema = json!( + { + "type": "fixed", + "name": "FixedUUID", + "size": 6, + "logicalType": "uuid" + }); + let parse_result = Schema::parse(&schema)?; assert_eq!( parse_result, Schema::Fixed(FixedSchema { name: Name::new("FixedUUID")?, - doc: None, aliases: None, - size: 16, - attributes: BTreeMap::from([( - "logicalType".to_string(), - Value::String(String::from("uuid")), - )]), + doc: None, + size: 6, + attributes: BTreeMap::from([("logicalType".to_string(), "uuid".into())]), }) ); + assert_logged( + r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#, + ); Ok(()) }
