wiedld commented on code in PR #6303:
URL: https://github.com/apache/arrow-rs/pull/6303#discussion_r1776018569
##########
arrow-array/src/array/union_array.rs:
##########
@@ -479,20 +739,139 @@ impl Array for UnionArray {
None
}
+ fn logical_nulls(&self) -> Option<NullBuffer> {
+ let fields = match self.data_type() {
+ DataType::Union(fields, _) => fields,
+ _ => unreachable!(),
+ };
+
+ if fields.len() <= 1 {
+ return self
+ .fields
+ .iter()
+ .flatten()
+ .map(Array::logical_nulls)
+ .next()
+ .flatten();
+ }
+
+ let logical_nulls = fields
+ .iter()
+ .filter_map(|(type_id, _)| Some((type_id,
self.child(type_id).logical_nulls()?)))
+ .filter(|(_, nulls)| nulls.null_count() > 0)
+ .collect::<Vec<_>>();
+
+ if logical_nulls.is_empty() {
+ return None;
+ }
+
+ let fully_null_count = logical_nulls
+ .iter()
+ .filter(|(_, nulls)| nulls.null_count() == nulls.len())
+ .count();
+
+ if fully_null_count == fields.len() {
+ if let Some((_, exactly_sized)) = logical_nulls
+ .iter()
+ .find(|(_, nulls)| nulls.len() == self.len())
+ {
+ return Some(exactly_sized.clone());
+ }
+
+ if let Some((_, bigger)) = logical_nulls
+ .iter()
+ .find(|(_, nulls)| nulls.len() > self.len())
+ {
+ return Some(bigger.slice(0, self.len()));
+ }
+
+ return Some(NullBuffer::new_null(self.len()));
+ }
+
+ let boolean_buffer = match &self.offsets {
+ Some(_) => self.gather_nulls(logical_nulls),
+ None => {
+ enum SparseStrategy {
+ Gather,
+ AllNullsSkipOne,
+ MixedSkipWithoutNulls,
+ MixedSkipFullyNull,
Review Comment:
Could you move this enum elsewhere and doc comment what the options mean?
Since the names are not self explanatory.
For example, the `AllNullsSkipOne` is actually "all fields have some (not
all) nulls" and the `SkipOne` part refers to how the child arrays are iterated.
Having that enum explained up front helps speedup the comprehension. 🙏🏼
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]