This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e7588bda4b Fix Dictionary logical nulls (#6740)
e7588bda4b is described below
commit e7588bda4b4048a5fb21cdb41efba93e18eca16d
Author: Piotr Findeisen <[email protected]>
AuthorDate: Tue Nov 19 09:41:24 2024 +0100
Fix Dictionary logical nulls (#6740)
For `DictionaryArray`, methods `logical_nulls` and `logical_null_count`
would return incorrect result if the underlying values had different
physical and logical nullability.
---
arrow-array/src/array/dictionary_array.rs | 52 +++++++++++++++++++++++++++++--
1 file changed, 50 insertions(+), 2 deletions(-)
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 1187e16769..920be65fcf 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -729,7 +729,7 @@ impl<T: ArrowDictionaryKeyType> Array for
DictionaryArray<T> {
}
fn logical_nulls(&self) -> Option<NullBuffer> {
- match self.values.nulls() {
+ match self.values.logical_nulls() {
None => self.nulls().cloned(),
Some(value_nulls) => {
let mut builder = BooleanBufferBuilder::new(self.len());
@@ -1020,7 +1020,7 @@ impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for
DictionaryArray<K> {
mod tests {
use super::*;
use crate::cast::as_dictionary_array;
- use crate::{Int16Array, Int32Array, Int8Array};
+ use crate::{Int16Array, Int32Array, Int8Array, RunArray};
use arrow_buffer::{Buffer, ToByteSlice};
#[test]
@@ -1445,6 +1445,54 @@ mod tests {
assert_eq!(values, &[Some(50), None, None, Some(2)])
}
+ #[test]
+ fn test_logical_nulls() -> Result<(), ArrowError> {
+ let values = Arc::new(RunArray::try_new(
+ &Int32Array::from(vec![1, 3, 7]),
+ &Int32Array::from(vec![Some(1), None, Some(3)]),
+ )?) as ArrayRef;
+
+ // For this test to be meaningful, the values array need to have
different nulls and logical nulls
+ assert_eq!(values.null_count(), 0);
+ assert_eq!(values.logical_null_count(), 2);
+
+ // Construct a trivial dictionary with 1-1 mapping to underlying array
+ let dictionary = DictionaryArray::<Int8Type>::try_new(
+ Int8Array::from((0..values.len()).map(|i| i as
i8).collect::<Vec<_>>()),
+ Arc::clone(&values),
+ )?;
+
+ // No keys are null
+ assert_eq!(dictionary.null_count(), 0);
+ // Dictionary array values are logically nullable
+ assert_eq!(dictionary.logical_null_count(),
values.logical_null_count());
+ assert_eq!(dictionary.logical_nulls(), values.logical_nulls());
+ assert!(dictionary.is_nullable());
+
+ // Construct a trivial dictionary with 1-1 mapping to underlying array
except that key 0 is nulled out
+ let dictionary = DictionaryArray::<Int8Type>::try_new(
+ Int8Array::from(
+ (0..values.len())
+ .map(|i| i as i8)
+ .map(|i| if i == 0 { None } else { Some(i) })
+ .collect::<Vec<_>>(),
+ ),
+ Arc::clone(&values),
+ )?;
+
+ // One key is null
+ assert_eq!(dictionary.null_count(), 1);
+
+ // Dictionary array values are logically nullable
+ assert_eq!(
+ dictionary.logical_null_count(),
+ values.logical_null_count() + 1
+ );
+ assert!(dictionary.is_nullable());
+
+ Ok(())
+ }
+
#[test]
fn test_normalized_keys() {
let values = vec![132, 0, 1].into();