This is an automated email from the ASF dual-hosted git repository.

mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 8078e099d feat: cherry-pick UUID conversion logic from #2528. (#2648)
8078e099d is described below

commit 8078e099d41293b05d3d11de72c088526b42ee3a
Author: Matt Butrovich <[email protected]>
AuthorDate: Sat Oct 25 10:39:03 2025 -0400

    feat: cherry-pick UUID conversion logic from #2528. (#2648)
---
 native/Cargo.lock                          |  1 +
 native/core/Cargo.toml                     |  1 +
 native/core/src/parquet/parquet_support.rs | 24 +++++++++++++++++++++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/native/Cargo.lock b/native/Cargo.lock
index 55c648edb..43e37c8b4 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -1539,6 +1539,7 @@ dependencies = [
  "tikv-jemallocator",
  "tokio",
  "url",
+ "uuid",
  "zstd",
 ]
 
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index c3d7dac84..92af3e238 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -77,6 +77,7 @@ reqwest = { version = "0.12", default-features = false, 
features = ["rustls-tls-
 object_store_opendal = {version = "0.54.0", optional = true}
 hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]}
 opendal = { version ="0.54.1", optional = true, features = ["services-hdfs"] }
+uuid = "1.0"
 
 [target.'cfg(target_os = "linux")'.dependencies]
 procfs = "0.18.0"
diff --git a/native/core/src/parquet/parquet_support.rs 
b/native/core/src/parquet/parquet_support.rs
index 00208e316..0b5c45d24 100644
--- a/native/core/src/parquet/parquet_support.rs
+++ b/native/core/src/parquet/parquet_support.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::execution::operators::ExecutionError;
-use arrow::array::{ListArray, MapArray};
+use arrow::array::{FixedSizeBinaryArray, ListArray, MapArray, StringArray};
 use arrow::buffer::NullBuffer;
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{FieldRef, Fields};
@@ -200,6 +200,28 @@ fn parquet_convert_array(
         (Map(_, ordered_from), Map(_, ordered_to)) if ordered_from == 
ordered_to =>
             parquet_convert_map_to_map(array.as_map(), to_type, 
parquet_options, *ordered_to)
             ,
+        // Iceberg stores UUIDs as 16-byte fixed binary but Spark expects 
string representation.
+        // Arrow doesn't support casting FixedSizeBinary to Utf8, so we handle 
it manually.
+        (FixedSizeBinary(16), Utf8) => {
+            let binary_array = array
+                .as_any()
+                .downcast_ref::<FixedSizeBinaryArray>()
+                .expect("Expected a FixedSizeBinaryArray");
+
+            let string_array: StringArray = binary_array
+                .iter()
+                .map(|opt_bytes| {
+                    opt_bytes.map(|bytes| {
+                        let uuid = uuid::Uuid::from_bytes(
+                            bytes.try_into().expect("Expected 16 bytes")
+                        );
+                        uuid.to_string()
+                    })
+                })
+                .collect();
+
+            Ok(Arc::new(string_array))
+        }
         // If Arrow cast supports the cast, delegate the cast to Arrow
         _ if can_cast_types(from_type, to_type) => {
             Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to