This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 8078e099d feat: cherry-pick UUID conversion logic from #2528. (#2648)
8078e099d is described below
commit 8078e099d41293b05d3d11de72c088526b42ee3a
Author: Matt Butrovich <[email protected]>
AuthorDate: Sat Oct 25 10:39:03 2025 -0400
feat: cherry-pick UUID conversion logic from #2528. (#2648)
---
native/Cargo.lock | 1 +
native/core/Cargo.toml | 1 +
native/core/src/parquet/parquet_support.rs | 24 +++++++++++++++++++++++-
3 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index 55c648edb..43e37c8b4 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -1539,6 +1539,7 @@ dependencies = [
"tikv-jemallocator",
"tokio",
"url",
+ "uuid",
"zstd",
]
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index c3d7dac84..92af3e238 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -77,6 +77,7 @@ reqwest = { version = "0.12", default-features = false,
features = ["rustls-tls-
object_store_opendal = {version = "0.54.0", optional = true}
hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]}
opendal = { version ="0.54.1", optional = true, features = ["services-hdfs"] }
+uuid = "1.0"
[target.'cfg(target_os = "linux")'.dependencies]
procfs = "0.18.0"
diff --git a/native/core/src/parquet/parquet_support.rs
b/native/core/src/parquet/parquet_support.rs
index 00208e316..0b5c45d24 100644
--- a/native/core/src/parquet/parquet_support.rs
+++ b/native/core/src/parquet/parquet_support.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::execution::operators::ExecutionError;
-use arrow::array::{ListArray, MapArray};
+use arrow::array::{FixedSizeBinaryArray, ListArray, MapArray, StringArray};
use arrow::buffer::NullBuffer;
use arrow::compute::can_cast_types;
use arrow::datatypes::{FieldRef, Fields};
@@ -200,6 +200,28 @@ fn parquet_convert_array(
(Map(_, ordered_from), Map(_, ordered_to)) if ordered_from ==
ordered_to =>
parquet_convert_map_to_map(array.as_map(), to_type,
parquet_options, *ordered_to)
,
+ // Iceberg stores UUIDs as 16-byte fixed binary but Spark expects
string representation.
+ // Arrow doesn't support casting FixedSizeBinary to Utf8, so we handle
it manually.
+ (FixedSizeBinary(16), Utf8) => {
+ let binary_array = array
+ .as_any()
+ .downcast_ref::<FixedSizeBinaryArray>()
+ .expect("Expected a FixedSizeBinaryArray");
+
+ let string_array: StringArray = binary_array
+ .iter()
+ .map(|opt_bytes| {
+ opt_bytes.map(|bytes| {
+ let uuid = uuid::Uuid::from_bytes(
+ bytes.try_into().expect("Expected 16 bytes")
+ );
+ uuid.to_string()
+ })
+ })
+ .collect();
+
+ Ok(Arc::new(string_array))
+ }
// If Arrow cast supports the cast, delegate the cast to Arrow
_ if can_cast_types(from_type, to_type) => {
Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]