This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 33d97ba2 Add UUID support for the Avro schema (#1706)
33d97ba2 is described below

commit 33d97ba2906782ccd11178ae62a469d2049a4e8f
Author: Fokko Driesprong <fo...@apache.org>
AuthorDate: Wed Sep 24 04:00:43 2025 +0200

    Add UUID support for the Avro schema (#1706)
    
    ## Which issue does this PR close?
    
    `Fixed[16]` encoded UUIDs are now supported in Avro-rs:
    https://github.com/apache/avro-rs/pull/255
    
    This will automatically support String and Fixed[16] logical types if
    the `logicalType` annotation is set correctly 👍
    
    ## What changes are included in this PR?
    
    <!--
    Provide a summary of the modifications in this PR. List the main changes
    such as new features, bug fixes, refactoring, or any other updates.
    -->
    
    ## Are these changes tested?
    
    <!--
    Specify what test covers (unit test, integration test, etc.).
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    -->
---
 crates/iceberg/src/avro/schema.rs | 65 ++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/crates/iceberg/src/avro/schema.rs 
b/crates/iceberg/src/avro/schema.rs
index b08a6730..fdbc6809 100644
--- a/crates/iceberg/src/avro/schema.rs
+++ b/crates/iceberg/src/avro/schema.rs
@@ -36,10 +36,7 @@ const ELEMENT_ID: &str = "element-id";
 const FIELD_ID_PROP: &str = "field-id";
 const KEY_ID: &str = "key-id";
 const VALUE_ID: &str = "value-id";
-const UUID_BYTES: usize = 16;
-const UUID_LOGICAL_TYPE: &str = "uuid";
 const MAP_LOGICAL_TYPE: &str = "map";
-// # TODO: https://github.com/apache/iceberg-rust/issues/86
 // This const may better to maintain in avro-rs.
 const LOGICAL_TYPE: &str = "logicalType";
 
@@ -237,8 +234,8 @@ impl SchemaVisitor for SchemaToAvroSchema {
             PrimitiveType::TimestampNs => AvroSchema::TimestampNanos,
             PrimitiveType::TimestamptzNs => AvroSchema::TimestampNanos,
             PrimitiveType::String => AvroSchema::String,
-            PrimitiveType::Uuid => avro_fixed_schema(UUID_BYTES, 
Some(UUID_LOGICAL_TYPE))?,
-            PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize, 
None)?,
+            PrimitiveType::Uuid => AvroSchema::Uuid,
+            PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize)?,
             PrimitiveType::Binary => AvroSchema::Bytes,
             PrimitiveType::Decimal { precision, scale } => {
                 avro_decimal_schema(*precision as usize, *scale as usize)?
@@ -274,21 +271,13 @@ fn avro_record_schema(name: &str, fields: 
Vec<AvroRecordField>) -> Result<AvroSc
     }))
 }
 
-pub(crate) fn avro_fixed_schema(len: usize, logical_type: Option<&str>) -> 
Result<AvroSchema> {
-    let attributes = if let Some(logical_type) = logical_type {
-        BTreeMap::from([(
-            LOGICAL_TYPE.to_string(),
-            Value::String(logical_type.to_string()),
-        )])
-    } else {
-        Default::default()
-    };
+pub(crate) fn avro_fixed_schema(len: usize) -> Result<AvroSchema> {
     Ok(AvroSchema::Fixed(FixedSchema {
         name: Name::new(format!("fixed_{len}").as_str())?,
         aliases: None,
         doc: None,
         size: len,
-        attributes,
+        attributes: Default::default(),
         default: None,
     }))
 }
@@ -533,30 +522,9 @@ impl AvroSchemaVisitor for AvroSchemaToSchema {
             AvroSchema::Long => Type::Primitive(PrimitiveType::Long),
             AvroSchema::Float => Type::Primitive(PrimitiveType::Float),
             AvroSchema::Double => Type::Primitive(PrimitiveType::Double),
+            AvroSchema::Uuid => Type::Primitive(PrimitiveType::Uuid),
             AvroSchema::String | AvroSchema::Enum(_) => 
Type::Primitive(PrimitiveType::String),
-            AvroSchema::Fixed(fixed) => {
-                if let Some(logical_type) = fixed.attributes.get(LOGICAL_TYPE) 
{
-                    let logical_type = logical_type.as_str().ok_or_else(|| {
-                        Error::new(
-                            ErrorKind::DataInvalid,
-                            "logicalType in attributes of avro schema is not a 
string type",
-                        )
-                    })?;
-                    match logical_type {
-                        UUID_LOGICAL_TYPE => 
Type::Primitive(PrimitiveType::Uuid),
-                        ty => {
-                            return Err(Error::new(
-                                ErrorKind::FeatureUnsupported,
-                                format!(
-                                    "Logical type {ty} is not support in 
iceberg primitive type.",
-                                ),
-                            ));
-                        }
-                    }
-                } else {
-                    Type::Primitive(PrimitiveType::Fixed(fixed.size as u64))
-                }
-            }
+            AvroSchema::Fixed(fixed) => 
Type::Primitive(PrimitiveType::Fixed(fixed.size as u64)),
             AvroSchema::Bytes => Type::Primitive(PrimitiveType::Binary),
             AvroSchema::Null => return Ok(None),
             _ => {
@@ -1223,4 +1191,25 @@ mod tests {
             converter.primitive(&AvroSchema::Date).unwrap().unwrap()
         );
     }
+
+    #[test]
+    fn test_uuid_type() {
+        let avro_schema = {
+            AvroSchema::parse_str(
+                r#"
+            {"name": "test", "type": "fixed", "size": 16, "logicalType": 
"uuid"}
+            "#,
+            )
+            .unwrap()
+        };
+
+        let mut converter = AvroSchemaToSchema;
+
+        let iceberg_type = Type::from(PrimitiveType::Uuid);
+
+        assert_eq!(
+            iceberg_type,
+            converter.primitive(&avro_schema).unwrap().unwrap()
+        );
+    }
 }

Reply via email to