This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e7588bda4b Fix Dictionary logical nulls (#6740)
e7588bda4b is described below

commit e7588bda4b4048a5fb21cdb41efba93e18eca16d
Author: Piotr Findeisen <[email protected]>
AuthorDate: Tue Nov 19 09:41:24 2024 +0100

    Fix Dictionary logical nulls (#6740)
    
    For `DictionaryArray`, methods `logical_nulls` and `logical_null_count`
    would return incorrect result if the underlying values had different
    physical and logical nullability.
---
 arrow-array/src/array/dictionary_array.rs | 52 +++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/arrow-array/src/array/dictionary_array.rs 
b/arrow-array/src/array/dictionary_array.rs
index 1187e16769..920be65fcf 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -729,7 +729,7 @@ impl<T: ArrowDictionaryKeyType> Array for 
DictionaryArray<T> {
     }
 
     fn logical_nulls(&self) -> Option<NullBuffer> {
-        match self.values.nulls() {
+        match self.values.logical_nulls() {
             None => self.nulls().cloned(),
             Some(value_nulls) => {
                 let mut builder = BooleanBufferBuilder::new(self.len());
@@ -1020,7 +1020,7 @@ impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for 
DictionaryArray<K> {
 mod tests {
     use super::*;
     use crate::cast::as_dictionary_array;
-    use crate::{Int16Array, Int32Array, Int8Array};
+    use crate::{Int16Array, Int32Array, Int8Array, RunArray};
     use arrow_buffer::{Buffer, ToByteSlice};
 
     #[test]
@@ -1445,6 +1445,54 @@ mod tests {
         assert_eq!(values, &[Some(50), None, None, Some(2)])
     }
 
+    #[test]
+    fn test_logical_nulls() -> Result<(), ArrowError> {
+        let values = Arc::new(RunArray::try_new(
+            &Int32Array::from(vec![1, 3, 7]),
+            &Int32Array::from(vec![Some(1), None, Some(3)]),
+        )?) as ArrayRef;
+
+        // For this test to be meaningful, the values array need to have 
different nulls and logical nulls
+        assert_eq!(values.null_count(), 0);
+        assert_eq!(values.logical_null_count(), 2);
+
+        // Construct a trivial dictionary with 1-1 mapping to underlying array
+        let dictionary = DictionaryArray::<Int8Type>::try_new(
+            Int8Array::from((0..values.len()).map(|i| i as 
i8).collect::<Vec<_>>()),
+            Arc::clone(&values),
+        )?;
+
+        // No keys are null
+        assert_eq!(dictionary.null_count(), 0);
+        // Dictionary array values are logically nullable
+        assert_eq!(dictionary.logical_null_count(), 
values.logical_null_count());
+        assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
+        assert!(dictionary.is_nullable());
+
+        // Construct a trivial dictionary with 1-1 mapping to underlying array 
except that key 0 is nulled out
+        let dictionary = DictionaryArray::<Int8Type>::try_new(
+            Int8Array::from(
+                (0..values.len())
+                    .map(|i| i as i8)
+                    .map(|i| if i == 0 { None } else { Some(i) })
+                    .collect::<Vec<_>>(),
+            ),
+            Arc::clone(&values),
+        )?;
+
+        // One key is null
+        assert_eq!(dictionary.null_count(), 1);
+
+        // Dictionary array values are logically nullable
+        assert_eq!(
+            dictionary.logical_null_count(),
+            values.logical_null_count() + 1
+        );
+        assert!(dictionary.is_nullable());
+
+        Ok(())
+    }
+
     #[test]
     fn test_normalized_keys() {
         let values = vec![132, 0, 1].into();

Reply via email to