Copilot commented on code in PR #22285:
URL: https://github.com/apache/datafusion/pull/22285#discussion_r3253700979


##########
datafusion/functions-nested/src/arrays_zip.rs:
##########
@@ -327,3 +331,181 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
 
     Ok(Arc::new(result))
 }
+
+fn try_perfect_list_zip(args: &[ArrayRef]) -> Result<Option<ArrayRef>> {
+    let mut list_arrays = Vec::with_capacity(args.len());
+    let mut struct_fields = Vec::with_capacity(args.len());
+
+    for (i, arg) in args.iter().enumerate() {
+        let arr = match arg.data_type() {
+            List(field) => {
+                struct_fields.push(Field::new(
+                    format!("{}", i + 1),
+                    field.data_type().clone(),
+                    true,
+                ));
+                as_list_array(arg)?
+            }
+            _ => return Ok(None),
+        };
+
+        list_arrays.push(arr);
+    }
+
+    let first = list_arrays[0];
+    let num_rows = first.len();
+    let offsets = first.offsets().clone();
+    let values_len = first.values().len();
+
+    for arr in &list_arrays {
+        if arr.len() != num_rows || arr.values().len() != values_len {
+            return Ok(None);
+        }
+    }
+
+    let nulls = if list_arrays.iter().any(|arr| arr.null_count() != 0) {
+        let mut null_builder = NullBufferBuilder::new(num_rows);
+        for row_idx in 0..num_rows {
+            let mut all_null = true;
+
+            for arr in &list_arrays {
+                if arr.is_null(row_idx) {
+                    if arr.offsets()[row_idx + 1] != arr.offsets()[row_idx] {
+                        return Ok(None);
+                    }
+                } else {
+                    all_null = false;
+                }
+            }
+
+            if all_null {
+                null_builder.append_null();
+            } else {
+                null_builder.append_non_null();
+            }
+        }
+
+        null_builder.finish()
+    } else {
+        None
+    };
+
+    for arr in &list_arrays {
+        if arr.offsets() != &offsets {
+            return Ok(None);
+        }
+    }

Review Comment:
   The offsets equality check happens after the per-row null scan. Since offset 
mismatch is a common/cheap rejection condition, move the `arr.offsets() != 
&offsets` loop up (before building/scanning `nulls`) to avoid the O(num_rows * 
num_arrays) pass when inputs don’t qualify for the fast-path.



##########
datafusion/functions-nested/src/arrays_zip.rs:
##########
@@ -327,3 +331,181 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
 
     Ok(Arc::new(result))
 }
+
+fn try_perfect_list_zip(args: &[ArrayRef]) -> Result<Option<ArrayRef>> {
+    let mut list_arrays = Vec::with_capacity(args.len());
+    let mut struct_fields = Vec::with_capacity(args.len());
+
+    for (i, arg) in args.iter().enumerate() {
+        let arr = match arg.data_type() {
+            List(field) => {
+                struct_fields.push(Field::new(
+                    format!("{}", i + 1),
+                    field.data_type().clone(),
+                    true,
+                ));
+                as_list_array(arg)?
+            }
+            _ => return Ok(None),
+        };

Review Comment:
   This fast-path has several non-obvious eligibility constraints (all args 
must be `List`, identical offsets across args, identical `values().len()`, and 
null rows must be zero-length). Add a short doc comment or inline comment 
summarizing these constraints and the rationale (reuse offsets/values to avoid 
rebuilding) to make future maintenance safer.



##########
datafusion/functions-nested/src/arrays_zip.rs:
##########
@@ -327,3 +331,181 @@ fn arrays_zip_inner(args: &[ArrayRef]) -> 
Result<ArrayRef> {
 
     Ok(Arc::new(result))
 }
+
+fn try_perfect_list_zip(args: &[ArrayRef]) -> Result<Option<ArrayRef>> {
+    let mut list_arrays = Vec::with_capacity(args.len());
+    let mut struct_fields = Vec::with_capacity(args.len());
+
+    for (i, arg) in args.iter().enumerate() {
+        let arr = match arg.data_type() {
+            List(field) => {
+                struct_fields.push(Field::new(
+                    format!("{}", i + 1),

Review Comment:
   Using `format!(\"{}\", i + 1)` for numeric-to-string conversion is heavier 
than needed. Prefer `(i + 1).to_string()` (or equivalent) here to reduce 
formatting overhead in this hot-path setup code.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to