alamb opened a new issue, #6017:
URL: https://github.com/apache/arrow-rs/issues/6017

   **Describe the bug**
   The [`is_null`](https://docs.rs/arrow/latest/arrow/compute/fn.is_null.html) 
and 
[`is_not_null`](https://docs.rs/arrow/latest/arrow/compute/fn.is_not_null.html) 
kernels are incorrect for `UnionArrays`
   
   I believe the core problem is that `UnionArray` does not implement 
[`Array::logical_nulls` 
](https://docs.rs/arrow/latest/arrow/array/trait.Array.html#method.logical_nulls)
   
   
https://github.com/apache/arrow-rs/blob/b9562b9550b8ff4aa7be9859e56e467b1a3b3de6/arrow-array/src/array/union_array.rs#L445-L525
   
   
   And instead falls back to the default implementation (which calls 
`Array::nulls`): 
https://github.com/apache/arrow-rs/blob/b9562b9550b8ff4aa7be9859e56e467b1a3b3de6/arrow-array/src/array/mod.rs#L212-L214
   
   
   **To Reproduce**
   Run these tests in `is_null.rs`:
   
   ```rust
   
       #[test]
       fn test_null_array_is_not_null() {
           let a = NullArray::new(3);
   
           let res = is_not_null(&a).unwrap();
   
           let expected = BooleanArray::from(vec![false, false, false]);
   
           assert_eq!(expected, res);
           assert!(res.nulls().is_none());
       }
   
       #[test]
       fn test_dense_union_is_null() {
           // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}]
           let int_array = Int32Array::from(vec![Some(1), None]);
           let float_array = Float64Array::from(vec![Some(3.2), None]);
           let str_array = StringArray::from(vec![Some("a"), None]);
           let type_ids = [0, 0, 1, 1, 2, 
2].into_iter().collect::<ScalarBuffer<i8>>();
           let offsets = [0, 1, 0, 1, 0, 1]
               .into_iter()
               .collect::<ScalarBuffer<i32>>();
   
           let children = vec![
               Arc::new(int_array) as Arc<dyn Array>,
               Arc::new(float_array),
               Arc::new(str_array),
           ];
   
           let array =
               UnionArray::try_new(union_fields(), type_ids, Some(offsets), 
children)
                   .unwrap();
   
           let result = is_null(&array).unwrap();
   
           let expected = &BooleanArray::from(vec![false, true, false, true, 
false, true]);
           assert_eq!(expected, &result);
       }
   
       #[test]
       fn test_sparse_union_is_null() {
           // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}]
           let int_array = Int32Array::from(vec![Some(1), None, None, None, 
None, None]);
           let float_array =
               Float64Array::from(vec![None, None, Some(3.2), None, None, 
None]);
           let str_array = StringArray::from(vec![None, None, None, None, 
Some("a"), None]);
           let type_ids = [0, 0, 1, 1, 2, 
2].into_iter().collect::<ScalarBuffer<i8>>();
   
           let children = vec![
               Arc::new(int_array) as Arc<dyn Array>,
               Arc::new(float_array),
               Arc::new(str_array),
           ];
   
           let array =
               UnionArray::try_new(union_fields(), type_ids, None, 
children).unwrap();
   
           let result = is_null(&array).unwrap();
   
           let expected = &BooleanArray::from(vec![false, true, false, true, 
false, true]);
           assert_eq!(expected, &result);
       }
   
       fn union_fields() -> UnionFields {
           [
               (0, Arc::new(Field::new("A", DataType::Int32, true))),
               (1, Arc::new(Field::new("B", DataType::Float64, true))),
               (2, Arc::new(Field::new("C", DataType::Utf8, true))),
           ]
               .into_iter()
               .collect()
       }
   ```
   
   <details><summary>Full diff</summary>
   <p>
   
   ```diff
   diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
   index ea8e12abbe2..0dd74a2d0b6 100644
   --- a/arrow-arith/src/boolean.rs
   +++ b/arrow-arith/src/boolean.rs
   @@ -354,6 +354,8 @@ pub fn is_not_null(input: &dyn Array) -> 
Result<BooleanArray, ArrowError> {
    mod tests {
        use super::*;
        use std::sync::Arc;
   +    use arrow_buffer::ScalarBuffer;
   +    use arrow_schema::{DataType, Field, UnionFields};
   
        #[test]
        fn test_bool_array_and() {
   @@ -911,4 +913,65 @@ mod tests {
            assert_eq!(expected, res);
            assert!(res.nulls().is_none());
        }
   +
   +    #[test]
   +    fn test_dense_union_is_null() {
   +        // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}]
   +        let int_array = Int32Array::from(vec![Some(1), None]);
   +        let float_array = Float64Array::from(vec![Some(3.2), None]);
   +        let str_array = StringArray::from(vec![Some("a"), None]);
   +        let type_ids = [0, 0, 1, 1, 2, 
2].into_iter().collect::<ScalarBuffer<i8>>();
   +        let offsets = [0, 1, 0, 1, 0, 1]
   +            .into_iter()
   +            .collect::<ScalarBuffer<i32>>();
   +
   +        let children = vec![
   +            Arc::new(int_array) as Arc<dyn Array>,
   +            Arc::new(float_array),
   +            Arc::new(str_array),
   +        ];
   +
   +        let array =
   +            UnionArray::try_new(union_fields(), type_ids, Some(offsets), 
children)
   +                .unwrap();
   +
   +        let result = is_null(&array).unwrap();
   +
   +        let expected = &BooleanArray::from(vec![false, true, false, true, 
false, true]);
   +        assert_eq!(expected, &result);
   +    }
   +
   +    #[test]
   +    fn test_sparse_union_is_null() {
   +        // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}]
   +        let int_array = Int32Array::from(vec![Some(1), None, None, None, 
None, None]);
   +        let float_array =
   +            Float64Array::from(vec![None, None, Some(3.2), None, None, 
None]);
   +        let str_array = StringArray::from(vec![None, None, None, None, 
Some("a"), None]);
   +        let type_ids = [0, 0, 1, 1, 2, 
2].into_iter().collect::<ScalarBuffer<i8>>();
   +
   +        let children = vec![
   +            Arc::new(int_array) as Arc<dyn Array>,
   +            Arc::new(float_array),
   +            Arc::new(str_array),
   +        ];
   +
   +        let array =
   +            UnionArray::try_new(union_fields(), type_ids, None, 
children).unwrap();
   +
   +        let result = is_null(&array).unwrap();
   +
   +        let expected = &BooleanArray::from(vec![false, true, false, true, 
false, true]);
   +        assert_eq!(expected, &result);
   +    }
   +
   +    fn union_fields() -> UnionFields {
   +        [
   +            (0, Arc::new(Field::new("A", DataType::Int32, true))),
   +            (1, Arc::new(Field::new("B", DataType::Float64, true))),
   +            (2, Arc::new(Field::new("C", DataType::Utf8, true))),
   +        ]
   +            .into_iter()
   +            .collect()
   +    }
    }
   ```
   
   </p>
   </details> 
   
   **Expected behavior**
   The tests should pass
   
   Instead they currently error as `is_null` always returns false and 
`is_not_null` always returns true:
   
   ```
   assertion `left == right` failed
     left: BooleanArray
   [
     false,
     true,
     false,
     true,
     false,
     true,
   ]
    right: BooleanArray
   [
     false,
     false,
     false,
     false,
     false,
     false,
   ]
   ```
   
   **Additional context**
   This was found by @samuelcolvin  on 
https://github.com/apache/datafusion/issues/11162
   
   The reproducers are from his PR on 
https://github.com/apache/datafusion/pull/11321


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to