bkietz commented on code in PR #4585:
URL: https://github.com/apache/arrow-rs/pull/4585#discussion_r1316442549


##########
arrow-array/src/array/byte_view_array.rs:
##########
@@ -0,0 +1,346 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::array::print_long_array;
+use crate::builder::GenericByteViewBuilder;
+use crate::iterator::ArrayIter;
+use crate::types::bytes::ByteArrayNativeType;
+use crate::types::{BinaryViewType, ByteViewType, StringViewType};
+use crate::{Array, ArrayAccessor, ArrayRef};
+use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
+use arrow_data::view::View;
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType};
+use std::any::Any;
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+/// An array of variable length byte view arrays
+pub struct GenericByteViewArray<T: ByteViewType> {
+    data_type: DataType,
+    views: ScalarBuffer<u128>,

Review Comment:
   Does `u128` carry a 128 bit alignment requirement? If so, this would appear 
to be unable to consume arrays whose views are aligned on a 64 bit boundary but 
happen *not* to be aligned on a 128 bit boundary. Views have only a 64 bit 
alignment guarantee. Could this be replaced with
   
   ```suggestion
       views: ScalarBuffer<u64>,
   ```
   ?
   
   Alternatively, perhaps this shouldn't be a `ScalarBuffer` at all? Packing 
the fields of views into bits of a `u128` seems slightly counter to the intent 
of the `ArrowNativeType` trait.



##########
arrow-data/src/equal/binary_view.rs:
##########
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::view::View;
+use crate::ArrayData;
+
+pub(super) fn binary_view_equal(
+    lhs: &ArrayData,
+    rhs: &ArrayData,
+    lhs_start: usize,
+    rhs_start: usize,
+    len: usize,
+) -> bool {
+    let lhs_views = &lhs.buffer::<u128>(0)[lhs_start..lhs_start + len];
+    let lhs_b = lhs.buffers();
+    let rhs_views = &rhs.buffer::<u128>(0)[rhs_start..rhs_start + len];
+    let rhs_b = rhs.buffers();
+
+    for (idx, (l, r)) in lhs_views.iter().zip(rhs_views).enumerate() {
+        // Only checking one null mask here because by the time the control 
flow reaches
+        // this point, the equality of the two masks would have already been 
verified.
+        if lhs.is_null(idx) {
+            continue;
+        }
+
+        let l_len = *l as u32;
+        let r_len = *r as u32;
+        if l_len != r_len {
+            return false;
+        } else if l_len <= 12 {
+            // Inline storage
+            if l != r {
+                return false;
+            }
+        } else {
+            let l_view = View::from(*l);
+            let r_view = View::from(*r);
+            let l_b = &lhs_b[(l_view.buffer_index as usize) + 1];
+            let r_b = &rhs_b[(r_view.buffer_index as usize) + 1];
+
+            let l_o = l_view.offset as usize;
+            let r_o = r_view.offset as usize;
+            let len = l_len as usize;
+            if l_b[l_o..l_o + len] != r_b[r_o..r_o + len] {
+                return false;
+            }
+        }
+    }
+    true

Review Comment:
   For equality comparison, we can compare the length and prefix of the views 
simultaneously:
   ```suggestion
       let lhs_b = lhs.buffers()[1..];
       let rhs_views = &rhs.buffer::<u128>(0)[rhs_start..rhs_start + len];
       let rhs_b = rhs.buffers()[1..];
   
       for (idx, (l, r)) in lhs_views.iter().zip(rhs_views).enumerate() {
           // Only checking one null mask here because by the time the control 
flow reaches
           // this point, the equality of the two masks would have already been 
verified.
           if lhs.is_null(idx) {
               continue;
           }
   
           let l_len_prefix = *l as u64;
           let r_len_prefix = *r as u64;
           if l_len_prefix != r_len_prefix {
               return false;
           }
   
           let len = *l as u32;
           if len <= 12 {
               if (*l >> 64) as u64 != (*r >> 64) as u64 {
                   return false;
               }
               continue;
           }
   
           let l_view = View::from(*l);
           let r_view = View::from(*r);
   
           let l_b = &lhs_b[l_view.buffer_index as usize];
           let r_b = &rhs_b[r_view.buffer_index as usize];
   
           // prefixes are already known to be equal; skip checking them
           let len = len as usize - 4;
           let l_o = l_view.offset as usize + 4;
           let r_o = r_view.offset as usize + 4;
   
           if l_b[l_o..l_o + len] != r_b[r_o..r_o + len] {
               return false;
           }
       }
       true
   ```



##########
arrow-data/src/transform/mod.rs:
##########
@@ -138,26 +106,31 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: 
bool) -> ExtendNullBits
 pub struct MutableArrayData<'a> {
     #[allow(dead_code)]
     arrays: Vec<&'a ArrayData>,
-    // The attributes in [_MutableArrayData] cannot be in [MutableArrayData] 
due to
-    // mutability invariants (interior mutability):
-    // [MutableArrayData] contains a function that can only mutate 
[_MutableArrayData], not
-    // [MutableArrayData] itself
+    /// The attributes in [_MutableArrayData] cannot be in [MutableArrayData] 
due to
+    /// mutability invariants (interior mutability):
+    /// [MutableArrayData] contains a function that can only mutate 
[_MutableArrayData], not
+    /// [MutableArrayData] itself
     data: _MutableArrayData<'a>,
 
-    // the child data of the `Array` in Dictionary arrays.
-    // This is not stored in `MutableArrayData` because these values constant 
and only needed
-    // at the end, when freezing [_MutableArrayData].
+    /// the child data of the `Array` in Dictionary arrays.
+    /// This is not stored in `MutableArrayData` because these values constant 
and only needed
+    /// at the end, when freezing [_MutableArrayData].
     dictionary: Option<ArrayData>,
 
-    // function used to extend values from arrays. This function's lifetime is 
bound to the array
-    // because it reads values from it.
+    /// View data buffers
+    /// This is not stored in `MutableArrayData` because these values constant 
and only needed
+    /// at the end, when freezing [_MutableArrayData].
+    view_buffers: Vec<Buffer>,

Review Comment:
   I'd recommend renaming this to avoid the suggestion that these buffers hold 
views
   ```suggestion
       /// Variadic data buffers referenced by views
       /// This is not stored in `MutableArrayData` because these values 
constant and only needed
       /// at the end, when freezing [_MutableArrayData].
       variadic_character_buffers: Vec<Buffer>,
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to