This is an automated email from the ASF dual-hosted git repository.

jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d80fdaa7d8 fix: Row encoding an array of type `Struct([])` (#7419)
d80fdaa7d8 is described below

commit d80fdaa7d8b337110dfee7384cf908c634a8cccf
Author: Alex Huang <[email protected]>
AuthorDate: Sat Jan 17 06:47:41 2026 +0200

    fix: Row encoding an array of type `Struct([])` (#7419)
    
    # Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax. For example
    `Closes #123` indicates that this PR will close issue #123.
    -->
    
    - Closes #7168
    
    # Rationale for this change
    
    Check the length of the rows when encoding the column
    
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    # What changes are included in this PR?
    
    
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    # Are there any user-facing changes?
    
    No
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    -->
    
    <!---
    If there are any breaking changes to public APIs, please call them out.
    -->
    
    ---------
    
    Co-authored-by: Jefffrey <[email protected]>
---
 arrow-row/src/lib.rs | 95 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 77 insertions(+), 18 deletions(-)

diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 1da0439ee9..fdad413e0e 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1591,10 +1591,19 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) 
-> LengthTracker {
             }
             Encoder::Struct(rows, null) => {
                 let array = as_struct_array(array);
-                tracker.push_variable((0..array.len()).map(|idx| match 
array.is_valid(idx) {
-                    true => 1 + rows.row_len(idx),
-                    false => 1 + null.data.len(),
-                }));
+                if rows.num_rows() > 0 {
+                    // Only calculate row length if there are rows
+                    tracker.push_variable((0..array.len()).map(|idx| match 
array.is_valid(idx) {
+                        true => 1 + rows.row_len(idx),
+                        false => 1 + null.data.len(),
+                    }));
+                } else {
+                    // Edge case for Struct([]) arrays (no child fields)
+                    tracker.push_variable((0..array.len()).map(|idx| match 
array.is_valid(idx) {
+                        true => 1,
+                        false => 1 + null.data.len(),
+                    }));
+                }
             }
             Encoder::List(rows) => match array.data_type() {
                 DataType::List(_) => {
@@ -1774,22 +1783,50 @@ fn encode_column(
             }
         }
         Encoder::Struct(rows, null) => {
+            fn struct_encode_helper<const NO_CHILD_FIELDS: bool>(
+                array: &StructArray,
+                offsets: &mut [usize],
+                null_sentinel: u8,
+                rows: &Rows,
+                null: &Row<'_>,
+                data: &mut [u8],
+            ) {
+                let empty_row = Row {
+                    data: &[],
+                    config: &rows.config,
+                };
+
+                offsets
+                    .iter_mut()
+                    .skip(1)
+                    .enumerate()
+                    .for_each(|(idx, offset)| {
+                        let (row, sentinel) = match array.is_valid(idx) {
+                            true => (
+                                if NO_CHILD_FIELDS {
+                                    empty_row
+                                } else {
+                                    rows.row(idx)
+                                },
+                                0x01,
+                            ),
+                            false => (*null, null_sentinel),
+                        };
+                        let end_offset = *offset + 1 + row.as_ref().len();
+                        data[*offset] = sentinel;
+                        data[*offset + 
1..end_offset].copy_from_slice(row.as_ref());
+                        *offset = end_offset;
+                    })
+            }
+
             let array = as_struct_array(column);
             let null_sentinel = null_sentinel(opts);
-            offsets
-                .iter_mut()
-                .skip(1)
-                .enumerate()
-                .for_each(|(idx, offset)| {
-                    let (row, sentinel) = match array.is_valid(idx) {
-                        true => (rows.row(idx), 0x01),
-                        false => (*null, null_sentinel),
-                    };
-                    let end_offset = *offset + 1 + row.as_ref().len();
-                    data[*offset] = sentinel;
-                    data[*offset + 
1..end_offset].copy_from_slice(row.as_ref());
-                    *offset = end_offset;
-                })
+            if rows.num_rows() == 0 {
+                // Edge case for Struct([]) arrays (no child fields)
+                struct_encode_helper::<true>(array, offsets, null_sentinel, 
rows, null, data);
+            } else {
+                struct_encode_helper::<false>(array, offsets, null_sentinel, 
rows, null, data);
+            }
         }
         Encoder::List(rows) => match column.data_type() {
             DataType::List(_) => list::encode(data, offsets, rows, opts, 
as_list_array(column)),
@@ -4368,6 +4405,28 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_struct_no_child_fields() {
+        fn run_test(array: ArrayRef) {
+            let sort_fields = vec![SortField::new(array.data_type().clone())];
+            let converter = RowConverter::new(sort_fields).unwrap();
+            let r = converter.convert_columns(&[Arc::clone(&array)]).unwrap();
+
+            let back = converter.convert_rows(&r).unwrap();
+            assert_eq!(back.len(), 1);
+            assert_eq!(&back[0], &array);
+        }
+
+        let s = Arc::new(StructArray::new_empty_fields(5, None)) as ArrayRef;
+        run_test(s);
+
+        let s = Arc::new(StructArray::new_empty_fields(
+            5,
+            Some(vec![true, false, true, false, false].into()),
+        )) as ArrayRef;
+        run_test(s);
+    }
+
     #[test]
     fn reserve_should_increase_capacity_to_the_requested_size() {
         let row_converter = 
RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();

Reply via email to