This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 6dea45377f Implement logical_null_count for more array types (#6704)
6dea45377f is described below
commit 6dea45377fba6def149db25ae1b22ee33244358b
Author: Piotr Findeisen <[email protected]>
AuthorDate: Sat Nov 9 08:42:40 2024 +0100
Implement logical_null_count for more array types (#6704)
Implement Array::logical_null_count() where it's easy to calculate
answer without relying on the default implementation which allocates.
---
arrow-array/src/array/byte_array.rs | 5 +++++
arrow-array/src/array/byte_view_array.rs | 5 +++++
arrow-array/src/array/dictionary_array.rs | 4 ++++
arrow-array/src/array/fixed_size_binary_array.rs | 5 +++++
arrow-array/src/array/fixed_size_list_array.rs | 5 +++++
arrow-array/src/array/list_array.rs | 5 +++++
arrow-array/src/array/list_view_array.rs | 5 +++++
arrow-array/src/array/map_array.rs | 5 +++++
arrow-array/src/array/run_array.rs | 4 ++++
arrow-array/src/array/struct_array.rs | 5 +++++
10 files changed, 48 insertions(+)
diff --git a/arrow-array/src/array/byte_array.rs
b/arrow-array/src/array/byte_array.rs
index a57abc5b1e..bec0caab10 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -461,6 +461,11 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.value_offsets.inner().inner().capacity();
sum += self.value_data.capacity();
diff --git a/arrow-array/src/array/byte_view_array.rs
b/arrow-array/src/array/byte_view_array.rs
index a35df0d200..81bb6a3855 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -583,6 +583,11 @@ impl<T: ByteViewType + ?Sized> Array for
GenericByteViewArray<T> {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.buffers.iter().map(|b| b.capacity()).sum::<usize>();
sum += self.views.inner().capacity();
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 6f27b383c0..1187e16769 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -866,6 +866,10 @@ impl<K: ArrowDictionaryKeyType, V: Sync> Array for
TypedDictionaryArray<'_, K, V
self.dictionary.logical_nulls()
}
+ fn logical_null_count(&self) -> usize {
+ self.dictionary.logical_null_count()
+ }
+
fn is_nullable(&self) -> bool {
self.dictionary.is_nullable()
}
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs
b/arrow-array/src/array/fixed_size_binary_array.rs
index 1371e81e26..8f1489ee4c 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -610,6 +610,11 @@ impl Array for FixedSizeBinaryArray {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.value_data.capacity();
if let Some(n) = &self.nulls {
diff --git a/arrow-array/src/array/fixed_size_list_array.rs
b/arrow-array/src/array/fixed_size_list_array.rs
index 72855cef1f..00a3144a87 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -409,6 +409,11 @@ impl Array for FixedSizeListArray {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
if let Some(n) = self.nulls.as_ref() {
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index 06d5ee4e14..1fab0009f2 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -493,6 +493,11 @@ impl<OffsetSize: OffsetSizeTrait> Array for
GenericListArray<OffsetSize> {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
size += self.value_offsets.inner().inner().capacity();
diff --git a/arrow-array/src/array/list_view_array.rs
b/arrow-array/src/array/list_view_array.rs
index bab686c3e4..4e949a6427 100644
--- a/arrow-array/src/array/list_view_array.rs
+++ b/arrow-array/src/array/list_view_array.rs
@@ -334,6 +334,11 @@ impl<OffsetSize: OffsetSizeTrait> Array for
GenericListViewArray<OffsetSize> {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
size += self.value_offsets.inner().capacity();
diff --git a/arrow-array/src/array/map_array.rs
b/arrow-array/src/array/map_array.rs
index d40b8ee845..254437630a 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -380,6 +380,11 @@ impl Array for MapArray {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut size = self.entries.get_buffer_memory_size();
size += self.value_offsets.inner().inner().capacity();
diff --git a/arrow-array/src/array/run_array.rs
b/arrow-array/src/array/run_array.rs
index 81c8cdcea4..dc4e6c96d9 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -596,6 +596,10 @@ impl<R: RunEndIndexType, V: Sync> Array for
TypedRunArray<'_, R, V> {
self.run_array.logical_nulls()
}
+ fn logical_null_count(&self) -> usize {
+ self.run_array.logical_null_count()
+ }
+
fn is_nullable(&self) -> bool {
self.run_array.is_nullable()
}
diff --git a/arrow-array/src/array/struct_array.rs
b/arrow-array/src/array/struct_array.rs
index d8bb1ace7a..41eb8235e5 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -378,6 +378,11 @@ impl Array for StructArray {
self.nulls.as_ref()
}
+ fn logical_null_count(&self) -> usize {
+ // More efficient that the default implementation
+ self.null_count()
+ }
+
fn get_buffer_memory_size(&self) -> usize {
let mut size = self.fields.iter().map(|a|
a.get_buffer_memory_size()).sum();
if let Some(n) = self.nulls.as_ref() {