alamb commented on code in PR #9910:
URL: https://github.com/apache/arrow-rs/pull/9910#discussion_r3190939307


##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -265,30 +237,46 @@ impl FixedSizeBinaryArray {
     /// Caller is responsible for ensuring that the index is within the bounds
     /// of the array
     pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
-        let offset = i + self.offset();
-        let pos = self.value_offset_at(offset);
+        let position = i * self.value_size;
         unsafe {
-            std::slice::from_raw_parts(
-                self.value_data.as_ptr().offset(pos as isize),
-                (self.value_offset_at(offset + 1) - pos) as usize,
-            )
+            std::slice::from_raw_parts(self.value_data.as_ptr().add(position), 
self.value_size)
         }
     }
 
     /// Returns the offset for the element at index `i`.
     ///
     /// Note this doesn't do any bound checking, for performance reason.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the computed byte offset exceeds `i32::MAX`.
+    #[deprecated(since = "59.0.0", note = "Use i * value_size() instead")]
     #[inline]
     pub fn value_offset(&self, i: usize) -> i32 {
-        self.value_offset_at(self.offset() + i)
+        self.value_length() * i as i32
     }
 
     /// Returns the length for an element.
     ///
     /// All elements have the same length as the array is a fixed size.
+    ///
+    /// Returns an `i32` to be compatible with the Arrow spec.
+    ///
+    /// Use [`Self::value_size`] to return a `usize`.
     #[inline]
     pub fn value_length(&self) -> i32 {
-        self.value_length
+        // This is safe: constructor validated that value_size was a valid i32
+        self.value_size as i32
+    }
+
+    /// Return the length for an element, as a usize.
+    ///
+    /// All elements have the same length as the array is a fixed size.
+    ///
+    /// Note: This value will always fit, without overflow, into an i32
+    #[inline]
+    pub fn value_size(&self) -> usize {

Review Comment:
   new public API so callers can get zero runtime cost type checked access to 
value_size as a `usize`



##########
arrow-array/src/builder/fixed_size_binary_builder.rs:
##########
@@ -206,11 +206,11 @@ mod tests {
         assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
         assert_eq!(6, array.len());
         assert_eq!(3, array.null_count());
-        assert_eq!(10, array.value_offset(2));

Review Comment:
   value_offset is deprecated. I think the offset is less interesting to 
validate compared to the actual value, so I updated the tets



##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -1122,26 +1099,6 @@ mod tests {
         array.value(4);
     }
 
-    #[test]
-    fn test_validate_lengths_allows_empty_array() {
-        FixedSizeBinaryArray::validate_lengths(1024, 0).unwrap();

Review Comment:
   this (internal) API was removed so I also removed the tests
   
   Note this was the API that was added (briefly) in 
   - https://github.com/apache/arrow-rs/pull/9872



##########
arrow-array/src/array/fixed_size_binary_array.rs:
##########
@@ -311,14 +299,16 @@ impl FixedSizeBinaryArray {
             offset.saturating_add(len) <= self.len,
             "the length + offset of the sliced FixedSizeBinaryArray cannot 
exceed the existing length"
         );
-
-        let size = self.value_length as usize;
+        let offset_bytes = offset

Review Comment:
   here is an additonal new overflow check



##########
arrow-string/src/substring.rs:
##########
@@ -322,31 +325,37 @@ where
 
 fn fixed_size_binary_substring(
     array: &FixedSizeBinaryArray,
-    old_len: i32,
-    start: i32,
-    length: Option<i32>,
+    old_len: usize,

Review Comment:
   here is another example of code that was rife with `as i32` / `as usize` and 
some 🙏  that it didn't overflow



##########
arrow-string/src/substring.rs:
##########
@@ -322,31 +325,37 @@ where
 
 fn fixed_size_binary_substring(
     array: &FixedSizeBinaryArray,
-    old_len: i32,
-    start: i32,
-    length: Option<i32>,
+    old_len: usize,
+    start: i64,
+    length: Option<u64>,
 ) -> Result<ArrayRef, ArrowError> {
-    let new_start = if start >= 0 {
-        start.min(old_len)
-    } else {
-        (old_len + start).max(0)
+    let new_start = match start.cmp(&0) {
+        Ordering::Greater => 
usize::try_from(start).unwrap_or(usize::MAX).min(old_len),
+        Ordering::Equal => 0,
+        Ordering::Less => {
+            let offset = 
usize::try_from(start.unsigned_abs()).unwrap_or(usize::MAX);
+            old_len.saturating_sub(offset)
+        }
     };
+
     let new_len = match length {
-        Some(len) => len.min(old_len - new_start),
+        Some(len) => usize::try_from(len)
+            .unwrap_or(usize::MAX)
+            .min(old_len - new_start),
         None => old_len - new_start,
     };
 
     // build value buffer
     let num_of_elements = array.len();
     let data = array.value_data();
-    let mut new_values = MutableBuffer::new(num_of_elements * (new_len as 
usize));
+    let capacity = num_of_elements
+        .checked_mul(new_len)
+        .expect("capacity overflow");
+    let mut new_values = MutableBuffer::new(capacity);
     (0..num_of_elements)
         .map(|idx| {
-            let offset = array.value_offset(idx);
-            (
-                (offset + new_start) as usize,

Review Comment:
   other examples of `as usize` that are removed now



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to