alamb commented on code in PR #1589:
URL: https://github.com/apache/arrow-rs/pull/1589#discussion_r860755500


##########
arrow/src/array/builder.rs:
##########
@@ -1894,23 +1894,19 @@ struct FieldData {
     values_buffer: Option<MutableBuffer>,
     ///  The number of array slots represented by the buffer
     slots: usize,
-    /// A builder for the bitmap if required (for Sparse Unions)
-    bitmap_builder: Option<BooleanBufferBuilder>,
+    /// A builder for the null bitmap

Review Comment:
   👍 



##########
arrow/src/array/builder.rs:
##########
@@ -2070,39 +2061,13 @@ impl UnionBuilder {
             fields: HashMap::default(),
             type_id_builder: Int8BufferBuilder::new(capacity),
             value_offset_builder: None,
-            bitmap_builder: None,
         }
     }
 
     /// Appends a null to this builder.
     #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        if self.bitmap_builder.is_none() {
-            let mut builder = BooleanBufferBuilder::new(self.len + 1);
-            for _ in 0..self.len {
-                builder.append(true);
-            }
-            self.bitmap_builder = Some(builder)
-        }
-        self.bitmap_builder
-            .as_mut()
-            .expect("Cannot be None")
-            .append(false);
-
-        self.type_id_builder.append(i8::default());
-
-        match &mut self.value_offset_builder {
-            // Handle dense union
-            Some(value_offset_builder) => 
value_offset_builder.append(i32::default()),
-            // Handle sparse union
-            None => {
-                for (_, fd) in self.fields.iter_mut() {
-                    fd.append_null_dynamic()?;
-                }
-            }
-        };
-        self.len += 1;
-        Ok(())
+    pub fn append_null<T: ArrowPrimitiveType>(&mut self, type_name: &str) -> 
Result<()> {

Review Comment:
   I realize from an implementation perspective why you chose to require 
appending a null by field name, but it seems kind of a messy API -- what do you 
think about potentially appending a null to the first child or something by 
default?
   
   Perhaps something to think about for a follow on PR



##########
arrow/src/array/array_union.rs:
##########
@@ -560,7 +554,7 @@ mod tests {
         builder.append::<Int32Type>("a", 1).unwrap();
         builder.append::<Int64Type>("c", 3).unwrap();
         builder.append::<Int32Type>("a", 10).unwrap();
-        builder.append_null().unwrap();
+        builder.append_null::<Int32Type>("a").unwrap();

Review Comment:
   it is strange to require a field name to append nulls -- I left a comment on 
how to improve things perhaps in the future



##########
arrow/src/array/equal/fixed_binary.rs:
##########
@@ -39,8 +36,8 @@ pub(super) fn fixed_binary_equal(
     let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..];
     let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..];
 
-    let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
-    let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
+    let lhs_null_count = count_nulls(lhs.null_buffer(), lhs_start + 
lhs.offset(), len);

Review Comment:
   it makes a lot more sense to look at the buffer's nulls directly



##########
arrow/src/compute/kernels/filter.rs:
##########
@@ -1707,17 +1738,17 @@ mod tests {
         let mut builder = UnionBuilder::new_sparse(4);
         builder.append::<Int32Type>("A", 1).unwrap();
         builder.append::<Float64Type>("B", 3.2).unwrap();
-        builder.append_null().unwrap();
+        builder.append_null::<Float64Type>("B").unwrap();
         builder.append::<Int32Type>("A", 34).unwrap();
         let array = builder.build().unwrap();
 
         let filter_array = BooleanArray::from(vec![true, false, true, false]);
         let c = filter(&array, &filter_array).unwrap();
         let filtered = c.as_any().downcast_ref::<UnionArray>().unwrap();
 
-        let mut builder = UnionBuilder::new_dense(1);
+        let mut builder = UnionBuilder::new_sparse(2);

Review Comment:
   I assume this was changed because the name of the test is 
`test_filter_union_array_sparse_with_nulls` but we were using a `dense` array 
previously



##########
arrow/src/array/data.rs:
##########
@@ -621,6 +621,13 @@ impl ArrayData {
         // Check that the data layout conforms to the spec
         let layout = layout(&self.data_type);
 
+        if !layout.can_contain_null_mask && self.null_bitmap.is_some() {

Review Comment:
   👍 



##########
arrow/src/array/array_union.rs:
##########
@@ -522,29 +516,29 @@ mod tests {
             match i {
                 0 => {
                     let slot = 
slot.as_any().downcast_ref::<Int32Array>().unwrap();
-                    assert!(!union.is_null(i));
+                    assert!(!slot.is_null(0));

Review Comment:
   https://arrow.apache.org/docs/format/Columnar.html#union-layout
   
   Says
   
   > Unlike other data types, unions do not have their own validity bitmap. 
Instead, the nullness of each slot is determined exclusively by the child 
arrays which are composed to create the union.
   
   Which seems consistent 👍 
   
   However, it seems like `UnionArray` doesn't override `is_null` to take this 
into account.  Filed https://github.com/apache/arrow-rs/issues/1625 to track



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to