mbutrovich commented on code in PR #3298:
URL: https://github.com/apache/datafusion-comet/pull/3298#discussion_r2733495439
##########
spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala:
##########
@@ -367,57 +350,62 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
// Only serialize partition type if there are actual partition fields
if (!fields.isEmpty) {
try {
- // Manually build StructType JSON to match iceberg-rust
expectations.
- // Using Iceberg's SchemaParser.toJson() would include
schema-level
- // metadata (e.g., "schema-id") that iceberg-rust's StructType
- // deserializer rejects. We need pure StructType format:
- // {"type":"struct","fields":[...]}
-
- // Filter out fields with unknown types (dropped partition
fields).
- // Unknown type fields represent partition columns that have
been dropped
- // from the schema. Per the Iceberg spec, unknown type fields
are not
- // stored in data files and iceberg-rust doesn't support
deserializing
- // them. Since these columns are dropped, we don't need to
expose their
- // partition values when reading.
- val fieldsJson = fields.asScala.flatMap { field =>
- val fieldTypeStr = getFieldType(field)
-
- // Skip fields with unknown type (dropped partition columns)
- if (fieldTypeStr == IcebergReflection.TypeNames.UNKNOWN) {
- None
- } else {
- val fieldIdMethod = field.getClass.getMethod("fieldId")
- val fieldId = fieldIdMethod.invoke(field).asInstanceOf[Int]
-
- val nameMethod = field.getClass.getMethod("name")
- val fieldName = nameMethod.invoke(field).asInstanceOf[String]
-
- val isOptionalMethod = field.getClass.getMethod("isOptional")
- val isOptional =
- isOptionalMethod.invoke(field).asInstanceOf[Boolean]
- val required = !isOptional
-
- Some(
- ("id" -> fieldId) ~
- ("name" -> fieldName) ~
- ("required" -> required) ~
- ("type" -> fieldTypeStr))
- }
- }.toList
+ // Use spec identity for deduplication - same spec = same
partition type
+ // Only build JSON for new specs
+ val typeIdxOpt =
partitionTypeToPoolIndex.get(spec.asInstanceOf[AnyRef])
+ val typeIdx = typeIdxOpt.getOrElse {
+ // Manually build StructType JSON to match iceberg-rust
expectations.
+ // Using Iceberg's SchemaParser.toJson() would include
schema-level
+ // metadata (e.g., "schema-id") that iceberg-rust's StructType
+ // deserializer rejects. We need pure StructType format:
+ // {"type":"struct","fields":[...]}
+
+ // Filter out fields with unknown types (dropped partition
fields).
+ // Unknown type fields represent partition columns that have
been dropped
+ // from the schema. Per the Iceberg spec, unknown type fields
are not
+ // stored in data files and iceberg-rust doesn't support
deserializing
+ // them. Since these columns are dropped, we don't need to
expose their
+ // partition values when reading.
+ val fieldsJson = fields.asScala.flatMap { field =>
+ val fieldTypeStr = getFieldType(field)
+
+ // Skip fields with unknown type (dropped partition columns)
+ if (fieldTypeStr == IcebergReflection.TypeNames.UNKNOWN) {
+ None
+ } else {
+ val fieldIdMethod = field.getClass.getMethod("fieldId")
Review Comment:
It looks like some of these still aren't being cached?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]