This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c50fa8c44 Improve default implementation of Array::is_nullable (#6721)
7c50fa8c44 is described below

commit 7c50fa8c441fad05e15c85af27db050f3810bef2
Author: Piotr Findeisen <[email protected]>
AuthorDate: Wed Nov 20 23:42:33 2024 +0100

    Improve default implementation of Array::is_nullable (#6721)
    
    * Improve default implementation of Array::is_nullable
    
    Since is_nullable returns `false` if the array is guaranteed to not
    contain any logical nulls, the correct default implementation could
    leverage `self.logical_null_count` more appropriately than
    `self.null_count`.
    
    To reduce chance of negative surprises in downstream projects, we could
    introduce the new behavior under `is_logically_nullable` name and
    deprecate `is_nullable` without changing it.
    
    * Implement efficient DictionaryArray::logical_null_count
---
 arrow-array/src/array/dictionary_array.rs | 20 ++++++++++++++++++++
 arrow-array/src/array/mod.rs              |  3 +--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arrow-array/src/array/dictionary_array.rs 
b/arrow-array/src/array/dictionary_array.rs
index 920be65fcf..55ecd56f98 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -749,6 +749,26 @@ impl<T: ArrowDictionaryKeyType> Array for 
DictionaryArray<T> {
         }
     }
 
+    fn logical_null_count(&self) -> usize {
+        match (self.keys.nulls(), self.values.logical_nulls()) {
+            (None, None) => 0,
+            (Some(key_nulls), None) => key_nulls.null_count(),
+            (None, Some(value_nulls)) => self
+                .keys
+                .values()
+                .iter()
+                .filter(|k| value_nulls.is_null(k.as_usize()))
+                .count(),
+            (Some(key_nulls), Some(value_nulls)) => self
+                .keys
+                .values()
+                .iter()
+                .enumerate()
+                .filter(|(idx, k)| key_nulls.is_null(*idx) || 
value_nulls.is_null(k.as_usize()))
+                .count(),
+        }
+    }
+
     fn is_nullable(&self) -> bool {
         !self.is_empty() && (self.nulls().is_some() || 
self.values.is_nullable())
     }
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 04d9883f5b..87577166ea 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -317,8 +317,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
     /// even if the nulls present in [`DictionaryArray::values`] are not 
referenced by any key,
     /// and therefore would not appear in [`Array::logical_nulls`].
     fn is_nullable(&self) -> bool {
-        // TODO this is not necessarily perfect default implementation, since 
null_count() and logical_null_count() are not always equivalent
-        self.null_count() != 0
+        self.logical_null_count() != 0
     }
 
     /// Returns the total number of bytes of memory pointed to by this array.

Reply via email to