andygrove commented on code in PR #4190:
URL: https://github.com/apache/datafusion-comet/pull/4190#discussion_r3185561676


##########
native/core/src/parquet/parquet_support.rs:
##########
@@ -279,13 +286,18 @@ fn parquet_convert_struct_to_struct(
                 }
             }
 
-            // If target schema doesn't contain any of the existing fields
-            // mark such a column in array as NULL
-            let nulls = if field_overlap {
-                array.nulls().cloned()
-            } else {
-                Some(NullBuffer::new_null(array.len()))
-            };
+            // When the file's struct contains none of the requested fields, 
the
+            // returned validity buffer depends on Spark's
+            // `spark.sql.legacy.parquet.returnNullStructIfAllFieldsMissing` 
(SPARK-53535,
+            // Spark 4.1+). Legacy mode marks the whole column null; the new 
default
+            // preserves the file's parent-row nullness so non-null parents 
materialize
+            // as a struct of all-null fields.
+            let nulls =

Review Comment:
   Updated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to