This is an automated email from the ASF dual-hosted git repository. kevinjqliu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push: new 33d97ba2 Add UUID support for the Avro schema (#1706) 33d97ba2 is described below commit 33d97ba2906782ccd11178ae62a469d2049a4e8f Author: Fokko Driesprong <fo...@apache.org> AuthorDate: Wed Sep 24 04:00:43 2025 +0200 Add UUID support for the Avro schema (#1706) ## Which issue does this PR close? `Fixed[16]` encoded UUIDs are now supported in Avro-rs: https://github.com/apache/avro-rs/pull/255 This will automatically support String and Fixed[16] logical types if the `logicalType` annotation is set correctly 👍 ## What changes are included in this PR? <!-- Provide a summary of the modifications in this PR. List the main changes such as new features, bug fixes, refactoring, or any other updates. --> ## Are these changes tested? <!-- Specify what test covers (unit test, integration test, etc.). If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> --- crates/iceberg/src/avro/schema.rs | 65 ++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index b08a6730..fdbc6809 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -36,10 +36,7 @@ const ELEMENT_ID: &str = "element-id"; const FIELD_ID_PROP: &str = "field-id"; const KEY_ID: &str = "key-id"; const VALUE_ID: &str = "value-id"; -const UUID_BYTES: usize = 16; -const UUID_LOGICAL_TYPE: &str = "uuid"; const MAP_LOGICAL_TYPE: &str = "map"; -// # TODO: https://github.com/apache/iceberg-rust/issues/86 // This const may better to maintain in avro-rs. const LOGICAL_TYPE: &str = "logicalType"; @@ -237,8 +234,8 @@ impl SchemaVisitor for SchemaToAvroSchema { PrimitiveType::TimestampNs => AvroSchema::TimestampNanos, PrimitiveType::TimestamptzNs => AvroSchema::TimestampNanos, PrimitiveType::String => AvroSchema::String, - PrimitiveType::Uuid => avro_fixed_schema(UUID_BYTES, Some(UUID_LOGICAL_TYPE))?, - PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize, None)?, + PrimitiveType::Uuid => AvroSchema::Uuid, + PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize)?, PrimitiveType::Binary => AvroSchema::Bytes, PrimitiveType::Decimal { precision, scale } => { avro_decimal_schema(*precision as usize, *scale as usize)? @@ -274,21 +271,13 @@ fn avro_record_schema(name: &str, fields: Vec<AvroRecordField>) -> Result<AvroSc })) } -pub(crate) fn avro_fixed_schema(len: usize, logical_type: Option<&str>) -> Result<AvroSchema> { - let attributes = if let Some(logical_type) = logical_type { - BTreeMap::from([( - LOGICAL_TYPE.to_string(), - Value::String(logical_type.to_string()), - )]) - } else { - Default::default() - }; +pub(crate) fn avro_fixed_schema(len: usize) -> Result<AvroSchema> { Ok(AvroSchema::Fixed(FixedSchema { name: Name::new(format!("fixed_{len}").as_str())?, aliases: None, doc: None, size: len, - attributes, + attributes: Default::default(), default: None, })) } @@ -533,30 +522,9 @@ impl AvroSchemaVisitor for AvroSchemaToSchema { AvroSchema::Long => Type::Primitive(PrimitiveType::Long), AvroSchema::Float => Type::Primitive(PrimitiveType::Float), AvroSchema::Double => Type::Primitive(PrimitiveType::Double), + AvroSchema::Uuid => Type::Primitive(PrimitiveType::Uuid), AvroSchema::String | AvroSchema::Enum(_) => Type::Primitive(PrimitiveType::String), - AvroSchema::Fixed(fixed) => { - if let Some(logical_type) = fixed.attributes.get(LOGICAL_TYPE) { - let logical_type = logical_type.as_str().ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "logicalType in attributes of avro schema is not a string type", - ) - })?; - match logical_type { - UUID_LOGICAL_TYPE => Type::Primitive(PrimitiveType::Uuid), - ty => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - format!( - "Logical type {ty} is not support in iceberg primitive type.", - ), - )); - } - } - } else { - Type::Primitive(PrimitiveType::Fixed(fixed.size as u64)) - } - } + AvroSchema::Fixed(fixed) => Type::Primitive(PrimitiveType::Fixed(fixed.size as u64)), AvroSchema::Bytes => Type::Primitive(PrimitiveType::Binary), AvroSchema::Null => return Ok(None), _ => { @@ -1223,4 +1191,25 @@ mod tests { converter.primitive(&AvroSchema::Date).unwrap().unwrap() ); } + + #[test] + fn test_uuid_type() { + let avro_schema = { + AvroSchema::parse_str( + r#" + {"name": "test", "type": "fixed", "size": 16, "logicalType": "uuid"} + "#, + ) + .unwrap() + }; + + let mut converter = AvroSchemaToSchema; + + let iceberg_type = Type::from(PrimitiveType::Uuid); + + assert_eq!( + iceberg_type, + converter.primitive(&avro_schema).unwrap().unwrap() + ); + } }