This is an automated email from the ASF dual-hosted git repository.

xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 12e12e27 feat: expose arrow type <-> iceberg type (#637)
12e12e27 is described below

commit 12e12e274571d7aa75c023f6bcd5164984041881
Author: xxchan <[email protected]>
AuthorDate: Fri Sep 20 19:59:55 2024 +0800

    feat: expose arrow type <-> iceberg type (#637)
    
    * feat: expose arrow type <-> iceberg type
    
    Previously we only exposed the schema conversion.
    
    Signed-off-by: xxchan <[email protected]>
    
    * add tests
    
    Signed-off-by: xxchan <[email protected]>
    
    ---------
    
    Signed-off-by: xxchan <[email protected]>
---
 crates/iceberg/src/arrow/schema.rs | 108 ++++++++++++++++++++++++++++++++++---
 1 file changed, 102 insertions(+), 6 deletions(-)

diff --git a/crates/iceberg/src/arrow/schema.rs 
b/crates/iceberg/src/arrow/schema.rs
index 2ff43e0f..08600664 100644
--- a/crates/iceberg/src/arrow/schema.rs
+++ b/crates/iceberg/src/arrow/schema.rs
@@ -171,7 +171,6 @@ fn visit_type<V: ArrowSchemaVisitor>(r#type: &DataType, 
visitor: &mut V) -> Resu
 }
 
 /// Visit list types in post order.
-#[allow(dead_code)]
 fn visit_list<V: ArrowSchemaVisitor>(
     data_type: &DataType,
     element_field: &Field,
@@ -184,7 +183,6 @@ fn visit_list<V: ArrowSchemaVisitor>(
 }
 
 /// Visit struct type in post order.
-#[allow(dead_code)]
 fn visit_struct<V: ArrowSchemaVisitor>(fields: &Fields, visitor: &mut V) -> 
Result<V::T> {
     let mut results = Vec::with_capacity(fields.len());
     for field in fields {
@@ -198,7 +196,6 @@ fn visit_struct<V: ArrowSchemaVisitor>(fields: &Fields, 
visitor: &mut V) -> Resu
 }
 
 /// Visit schema in post order.
-#[allow(dead_code)]
 fn visit_schema<V: ArrowSchemaVisitor>(schema: &ArrowSchema, visitor: &mut V) 
-> Result<V::U> {
     let mut results = Vec::with_capacity(schema.fields().len());
     for field in schema.fields() {
@@ -211,12 +208,17 @@ fn visit_schema<V: ArrowSchemaVisitor>(schema: 
&ArrowSchema, visitor: &mut V) ->
 }
 
 /// Convert Arrow schema to ceberg schema.
-#[allow(dead_code)]
 pub fn arrow_schema_to_schema(schema: &ArrowSchema) -> Result<Schema> {
     let mut visitor = ArrowSchemaConverter::new();
     visit_schema(schema, &mut visitor)
 }
 
+/// Convert Arrow type to iceberg type.
+pub fn arrow_type_to_type(ty: &DataType) -> Result<Type> {
+    let mut visitor = ArrowSchemaConverter::new();
+    visit_type(ty, &mut visitor)
+}
+
 const ARROW_FIELD_DOC_KEY: &str = "doc";
 
 fn get_field_id(field: &Field) -> Result<i32> {
@@ -246,7 +248,6 @@ fn get_field_doc(field: &Field) -> Option<String> {
 struct ArrowSchemaConverter;
 
 impl ArrowSchemaConverter {
-    #[allow(dead_code)]
     fn new() -> Self {
         Self {}
     }
@@ -615,6 +616,15 @@ pub fn schema_to_arrow_schema(schema: 
&crate::spec::Schema) -> crate::Result<Arr
     }
 }
 
+/// Convert iceberg type to an arrow type.
+pub fn type_to_arrow_type(ty: &crate::spec::Type) -> crate::Result<DataType> {
+    let mut converter = ToArrowSchemaConverter;
+    match crate::spec::visit_type(ty, &mut converter)? {
+        ArrowSchemaOrFieldOrType::Type(ty) => Ok(ty),
+        _ => unreachable!(),
+    }
+}
+
 /// Convert Iceberg Datum to Arrow Datum.
 pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Box<dyn ArrowDatum + 
Send>> {
     match (datum.data_type(), datum.literal()) {
@@ -779,7 +789,7 @@ mod tests {
     use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
 
     use super::*;
-    use crate::spec::Schema;
+    use crate::spec::{Literal, Schema};
 
     /// Create a simple field with metadata.
     fn simple_field(name: &str, ty: DataType, nullable: bool, value: &str) -> 
Field {
@@ -1365,4 +1375,90 @@ mod tests {
         let converted_arrow_schema = schema_to_arrow_schema(&schema).unwrap();
         assert_eq!(converted_arrow_schema, arrow_schema);
     }
+
+    #[test]
+    fn test_type_conversion() {
+        // test primitive type
+        {
+            let arrow_type = DataType::Int32;
+            let iceberg_type = Type::Primitive(PrimitiveType::Int);
+            assert_eq!(arrow_type, type_to_arrow_type(&iceberg_type).unwrap());
+            assert_eq!(iceberg_type, arrow_type_to_type(&arrow_type).unwrap());
+        }
+
+        // test struct type
+        {
+            // no metadata will cause error
+            let arrow_type = DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int64, false),
+                Field::new("b", DataType::Utf8, true),
+            ]));
+            assert_eq!(
+                &arrow_type_to_type(&arrow_type).unwrap_err().to_string(),
+                "DataInvalid => Field id not found in metadata"
+            );
+
+            let arrow_type = DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int64, 
false).with_metadata(HashMap::from_iter([(
+                    PARQUET_FIELD_ID_META_KEY.to_string(),
+                    1.to_string(),
+                )])),
+                Field::new("b", DataType::Utf8, 
true).with_metadata(HashMap::from_iter([(
+                    PARQUET_FIELD_ID_META_KEY.to_string(),
+                    2.to_string(),
+                )])),
+            ]));
+            let iceberg_type = Type::Struct(StructType::new(vec![
+                NestedField {
+                    id: 1,
+                    doc: None,
+                    name: "a".to_string(),
+                    required: true,
+                    field_type: Box::new(Type::Primitive(PrimitiveType::Long)),
+                    initial_default: None,
+                    write_default: None,
+                }
+                .into(),
+                NestedField {
+                    id: 2,
+                    doc: None,
+                    name: "b".to_string(),
+                    required: false,
+                    field_type: 
Box::new(Type::Primitive(PrimitiveType::String)),
+                    initial_default: None,
+                    write_default: None,
+                }
+                .into(),
+            ]));
+            assert_eq!(iceberg_type, arrow_type_to_type(&arrow_type).unwrap());
+            assert_eq!(arrow_type, type_to_arrow_type(&iceberg_type).unwrap());
+
+            // initial_default and write_default is ignored
+            let iceberg_type = Type::Struct(StructType::new(vec![
+                NestedField {
+                    id: 1,
+                    doc: None,
+                    name: "a".to_string(),
+                    required: true,
+                    field_type: Box::new(Type::Primitive(PrimitiveType::Long)),
+                    initial_default: 
Some(Literal::Primitive(PrimitiveLiteral::Int(114514))),
+                    write_default: None,
+                }
+                .into(),
+                NestedField {
+                    id: 2,
+                    doc: None,
+                    name: "b".to_string(),
+                    required: false,
+                    field_type: 
Box::new(Type::Primitive(PrimitiveType::String)),
+                    initial_default: None,
+                    write_default: 
Some(Literal::Primitive(PrimitiveLiteral::String(
+                        "514".to_string(),
+                    ))),
+                }
+                .into(),
+            ]));
+            assert_eq!(arrow_type, type_to_arrow_type(&iceberg_type).unwrap());
+        }
+    }
 }

Reply via email to