puneetdixit200 commented on code in PR #22285:
URL: https://github.com/apache/datafusion/pull/22285#discussion_r3301924048


##########
datafusion/functions-nested/src/arrays_zip.rs:
##########
@@ -327,3 +332,226 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
 
     Ok(Arc::new(result))
 }
+
+fn arrays_zip_field_name(index: usize) -> String {
+    (index + 1).to_string()
+}
+
+fn arrays_zip_field_names(len: usize) -> Vec<String> {
+    (0..len).map(arrays_zip_field_name).collect()
+}
+
+/// Fast path for regular List inputs whose existing buffers already match the
+/// zipped output: all offsets and values lengths match, and null rows cover no
+/// values. This lets us reuse offsets and child values instead of rebuilding.
+fn try_perfect_list_zip(
+    args: &[ArrayRef],
+    field_names: &[String],
+) -> Result<Option<ArrayRef>> {
+    debug_assert_eq!(args.len(), field_names.len());
+
+    let mut list_arrays = Vec::with_capacity(args.len());
+    let mut struct_fields = Vec::with_capacity(args.len());
+
+    for (arg, field_name) in args.iter().zip(field_names) {
+        let arr = match arg.data_type() {
+            List(field) => {
+                struct_fields.push(Field::new(
+                    field_name.clone(),
+                    field.data_type().clone(),
+                    true,
+                ));
+                as_list_array(arg)?
+            }
+            _ => return Ok(None),
+        };
+
+        list_arrays.push(arr);
+    }
+
+    let first = list_arrays[0];
+    let num_rows = first.len();
+    let offsets = first.offsets().clone();
+    let values_len = first.values().len();
+
+    // Reusing the child arrays is only valid when every list uses the exact
+    // same row boundaries and exposes the same total number of child values.
+    for arr in &list_arrays {
+        if arr.len() != num_rows
+            || arr.values().len() != values_len
+            || arr.offsets() != &offsets
+        {
+            return Ok(None);
+        }
+    }
+
+    let nulls = if list_arrays.iter().any(|arr| arr.null_count() != 0) {

Review Comment:
   Kept the explicit null construction here because `arrays_zip` only marks an 
output row null when every concrete input list row is null. 
`NullBuffer::union_many` would instead make mixed null/non-null rows null if 
any input is null, which would change cases like `arrays_zip(NULL::int[], [1, 
2, 3])`. b184e03c5 adds a regression for the mixed-null empty-row case and 
keeps the hidden-value fallback.



##########
datafusion/functions-nested/src/arrays_zip.rs:
##########
@@ -327,3 +332,226 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
 
     Ok(Arc::new(result))
 }
+
+fn arrays_zip_field_name(index: usize) -> String {
+    (index + 1).to_string()
+}
+
+fn arrays_zip_field_names(len: usize) -> Vec<String> {
+    (0..len).map(arrays_zip_field_name).collect()
+}
+
+/// Fast path for regular List inputs whose existing buffers already match the
+/// zipped output: all offsets and values lengths match, and null rows cover no
+/// values. This lets us reuse offsets and child values instead of rebuilding.
+fn try_perfect_list_zip(
+    args: &[ArrayRef],
+    field_names: &[String],
+) -> Result<Option<ArrayRef>> {
+    debug_assert_eq!(args.len(), field_names.len());
+
+    let mut list_arrays = Vec::with_capacity(args.len());
+    let mut struct_fields = Vec::with_capacity(args.len());
+
+    for (arg, field_name) in args.iter().zip(field_names) {
+        let arr = match arg.data_type() {
+            List(field) => {
+                struct_fields.push(Field::new(
+                    field_name.clone(),
+                    field.data_type().clone(),
+                    true,
+                ));
+                as_list_array(arg)?
+            }
+            _ => return Ok(None),
+        };
+
+        list_arrays.push(arr);
+    }
+
+    let first = list_arrays[0];
+    let num_rows = first.len();
+    let offsets = first.offsets().clone();
+    let values_len = first.values().len();
+
+    // Reusing the child arrays is only valid when every list uses the exact
+    // same row boundaries and exposes the same total number of child values.
+    for arr in &list_arrays {
+        if arr.len() != num_rows

Review Comment:
   Removed in b184e03c5.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to