Jefffrey commented on code in PR #9275:
URL: https://github.com/apache/arrow-rs/pull/9275#discussion_r2761850332
##########
arrow-row/src/variable.rs:
##########
@@ -143,6 +146,18 @@ pub fn encode_empty(out: &mut [u8], opts: SortOptions) ->
usize {
1
}
+pub fn encode_null_value(out: &mut [u8], opts: SortOptions) -> usize {
Review Comment:
```suggestion
/// Ensure `NullArray`s don't get encoded as empty lists which can lose
their length
pub fn encode_null_value(out: &mut [u8], opts: SortOptions) -> usize {
```
Just so we have an easy record for why we're doing this
##########
arrow-row/src/lib.rs:
##########
@@ -5391,4 +4478,290 @@ mod tests {
let mut lengths_iter = rows.lengths();
assert_eq!(lengths_iter.next(), None);
}
+
+ #[test]
Review Comment:
Thanks for the comprehensive tests, though I feel there is a bit too much;
some of them can be consolidated (e.g. we don't really need a separate test for
`[[], [], []]`, `[[NULL], [], [NULL, NULL]]`, `[[NULL], NULL, [NULL, NULL]]`;
we can combine them together)
Also some tests seem to be incorrectly named, such as
`test_nested_null_list` which doesn't actually test a nested list.
##########
arrow-row/src/lib.rs:
##########
@@ -5391,4 +4478,290 @@ mod tests {
let mut lengths_iter = rows.lengths();
assert_eq!(lengths_iter.next(), None);
}
+
+ #[test]
+ fn test_nested_null_list() {
+ let null_array = Arc::new(NullArray::new(3));
+ // [[NULL], [], [NULL, NULL]]
+ let list: ArrayRef = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![1, 0, 2]),
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test case from original issue #9227: [[NULL]] - double nested List with
Null
+ #[test]
+ fn test_double_nested_null_list() {
+ let null_array = Arc::new(NullArray::new(1));
+ // [NULL]
+ let nested_field = Arc::new(Field::new_list_field(DataType::Null,
true));
+ let nested_list = Arc::new(ListArray::new(
+ nested_field.clone(),
+ OffsetBuffer::from_lengths(vec![1]),
+ null_array,
+ None,
+ ));
+ // [[NULL]]
+ let list = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::List(nested_field), true).into(),
+ OffsetBuffer::from_lengths(vec![1]),
+ nested_list,
+ None,
+ )) as ArrayRef;
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test LargeList<Null>
+ #[test]
+ fn test_large_list_null() {
+ let null_array = Arc::new(NullArray::new(3));
+ // [[NULL], [], [NULL, NULL]] as LargeList
+ let list: ArrayRef = Arc::new(LargeListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![1, 0, 2]),
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test FixedSizeList<Null>
+ #[test]
+ fn test_fixed_size_list_null() {
+ let null_array = Arc::new(NullArray::new(6));
+ // [[NULL, NULL], [NULL, NULL], [NULL, NULL]] as FixedSizeList
+ let list: ArrayRef = Arc::new(FixedSizeListArray::new(
+ Arc::new(Field::new_list_field(DataType::Null, true)),
+ 2,
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test triple nested null list: [[[NULL]]]
+ #[test]
+ fn test_triple_nested_null_list() {
Review Comment:
Triple nesting is probably unnecessary; testing double nesting should be
sufficient
##########
arrow-row/src/lib.rs:
##########
@@ -5391,4 +4478,290 @@ mod tests {
let mut lengths_iter = rows.lengths();
assert_eq!(lengths_iter.next(), None);
}
+
+ #[test]
+ fn test_nested_null_list() {
+ let null_array = Arc::new(NullArray::new(3));
+ // [[NULL], [], [NULL, NULL]]
+ let list: ArrayRef = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![1, 0, 2]),
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test case from original issue #9227: [[NULL]] - double nested List with
Null
+ #[test]
+ fn test_double_nested_null_list() {
+ let null_array = Arc::new(NullArray::new(1));
+ // [NULL]
+ let nested_field = Arc::new(Field::new_list_field(DataType::Null,
true));
+ let nested_list = Arc::new(ListArray::new(
+ nested_field.clone(),
+ OffsetBuffer::from_lengths(vec![1]),
+ null_array,
+ None,
+ ));
+ // [[NULL]]
+ let list = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::List(nested_field), true).into(),
+ OffsetBuffer::from_lengths(vec![1]),
+ nested_list,
+ None,
+ )) as ArrayRef;
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test LargeList<Null>
+ #[test]
+ fn test_large_list_null() {
+ let null_array = Arc::new(NullArray::new(3));
+ // [[NULL], [], [NULL, NULL]] as LargeList
+ let list: ArrayRef = Arc::new(LargeListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![1, 0, 2]),
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test FixedSizeList<Null>
+ #[test]
+ fn test_fixed_size_list_null() {
+ let null_array = Arc::new(NullArray::new(6));
+ // [[NULL, NULL], [NULL, NULL], [NULL, NULL]] as FixedSizeList
+ let list: ArrayRef = Arc::new(FixedSizeListArray::new(
+ Arc::new(Field::new_list_field(DataType::Null, true)),
+ 2,
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test triple nested null list: [[[NULL]]]
+ #[test]
+ fn test_triple_nested_null_list() {
+ let null_array = Arc::new(NullArray::new(1));
+ // [NULL]
+ let inner_field = Arc::new(Field::new_list_field(DataType::Null,
true));
+ let inner_list = Arc::new(ListArray::new(
+ inner_field.clone(),
+ OffsetBuffer::from_lengths(vec![1]),
+ null_array,
+ None,
+ ));
+ // [[NULL]]
+ let middle_field =
Arc::new(Field::new_list_field(DataType::List(inner_field), true));
+ let middle_list = Arc::new(ListArray::new(
+ middle_field.clone(),
+ OffsetBuffer::from_lengths(vec![1]),
+ inner_list,
+ None,
+ ));
+ // [[[NULL]]]
+ let list = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::List(middle_field), true).into(),
+ OffsetBuffer::from_lengths(vec![1]),
+ middle_list,
+ None,
+ )) as ArrayRef;
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test List<Null> with null values
+ #[test]
+ fn test_list_null_with_nulls() {
+ let null_array = Arc::new(NullArray::new(3));
+ // [[NULL], null, [NULL, NULL]] - middle element is null
+ let list: ArrayRef = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![1, 0, 2]),
+ null_array,
+ Some(vec![true, false, true].into()), // validity bitmap
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test List<Null> with descending order
+ #[test]
+ fn test_list_null_descending() {
+ let null_array = Arc::new(NullArray::new(3));
+ // [[NULL], [], [NULL, NULL]]
+ let list: ArrayRef = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![1, 0, 2]),
+ null_array,
+ None,
+ ));
+
+ let options = SortOptions::default().with_descending(true);
+ let field = SortField::new_with_options(list.data_type().clone(),
options);
+ let converter = RowConverter::new(vec![field]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test empty List<Null>
+ #[test]
+ fn test_empty_list_null() {
+ let null_array = Arc::new(NullArray::new(0));
+ // []
+ let list: ArrayRef = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![]),
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test List<Null> with all empty sublists
+ #[test]
+ fn test_list_null_all_empty_sublists() {
+ let null_array = Arc::new(NullArray::new(0));
+ // [[], [], []]
+ let list: ArrayRef = Arc::new(ListArray::new(
+ Field::new_list_field(DataType::Null, true).into(),
+ OffsetBuffer::from_lengths(vec![0, 0, 0]),
+ null_array,
+ None,
+ ));
+
+ let converter =
RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
+ let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&list, &back[0]);
+ }
+
+ // Test Struct with Null field
+ #[test]
+ fn test_struct_with_null_field() {
+ // Struct { a: Null, b: Int32 }
+ let null_array = Arc::new(NullArray::new(3));
+ let int_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
+
+ let struct_array: ArrayRef = Arc::new(StructArray::new(
+ vec![
+ Arc::new(Field::new("a", DataType::Null, true)),
+ Arc::new(Field::new("b", DataType::Int32, true)),
+ ]
+ .into(),
+ vec![null_array, int_array],
+ Some(vec![true, true, false].into()), // validity bitmap
+ ));
+
+ let converter =
+
RowConverter::new(vec![SortField::new(struct_array.data_type().clone())]).unwrap();
+ let rows = converter
+ .convert_columns(&[Arc::clone(&struct_array)])
+ .unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&struct_array, &back[0]);
+ }
+
+ // Test nested Struct with Null
+ #[test]
+ fn test_nested_struct_with_null() {
+ // Inner struct: { x: Null }
+ let inner_null = Arc::new(NullArray::new(2));
+ let inner_struct = Arc::new(StructArray::new(
+ vec![Arc::new(Field::new("x", DataType::Null, true))].into(),
+ vec![inner_null],
+ None,
+ ));
+
+ // Outer struct: { inner: Struct { x: Null }, y: Int32 }
+ let y_array = Arc::new(Int32Array::from(vec![10, 20]));
+ let outer_struct: ArrayRef = Arc::new(StructArray::new(
+ vec![
+ Arc::new(Field::new("inner", inner_struct.data_type().clone(),
true)),
+ Arc::new(Field::new("y", DataType::Int32, true)),
+ ]
+ .into(),
+ vec![inner_struct, y_array],
+ None,
+ ));
+
+ let converter =
+
RowConverter::new(vec![SortField::new(outer_struct.data_type().clone())]).unwrap();
+ let rows = converter
+ .convert_columns(&[Arc::clone(&outer_struct)])
+ .unwrap();
+ let back = converter.convert_rows(&rows).unwrap();
+
+ assert_eq!(&outer_struct, &back[0]);
+ }
+
+ // Test Map<Null> - verify it's not supported (as per current
implementation)
+ // This test verifies the behavior mentioned in alamb's comment about Map
Review Comment:
```suggestion
// https://github.com/apache/arrow-rs/issues/7879
```
We have an issue for map support already, can just link it here
- https://github.com/apache/arrow-rs/issues/7879
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]