This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4ed4053  ARROW-3549: [Rust] Replace i64 with usize for some bit 
utility functions
4ed4053 is described below

commit 4ed4053721e421fe201b8eb2f9f96ba267c94bde
Author: Chao Sun <[email protected]>
AuthorDate: Thu Oct 18 11:26:13 2018 +0200

    ARROW-3549: [Rust] Replace i64 with usize for some bit utility functions
    
    We currently use `i64` in some bit utility functions. This can be replaced 
by `usize` to avoid unnecessary type conversions and invalid argument.
    
    Author: Chao Sun <[email protected]>
    
    Closes #2784 from sunchao/ARROW-3549 and squashes the following commits:
    
    55f78fb6 <Chao Sun> ARROW-3549:  Replace i64 with usize for some bit 
utility functions
---
 rust/src/array.rs         | 14 +++++++-------
 rust/src/array_data.rs    |  2 +-
 rust/src/bitmap.rs        |  3 ++-
 rust/src/util/bit_util.rs | 26 +++++++++++++-------------
 4 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/rust/src/array.rs b/rust/src/array.rs
index 9eca9eb..963a235 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -220,7 +220,7 @@ macro_rules! def_primitive_array {
                     let null_slice = null_buf.data_mut();
                     for (i, v) in data.iter().enumerate() {
                         if let Some(n) = v {
-                            bit_util::set_bit(null_slice, i as i64);
+                            bit_util::set_bit(null_slice, i);
                             // unwrap() in the following should be safe here 
since we've
                             // made sure enough space is allocated for the 
values.
                             val_buf.write(&n.to_byte_slice()).unwrap();
@@ -306,10 +306,10 @@ impl PrimitiveArray<bool> {
     }
 
     /// Returns the boolean value at index `i`.
-    ///
-    /// Note this doesn't do any bound checking, for performance reason.
     pub fn value(&self, i: i64) -> bool {
-        unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, i + 
self.offset()) }
+        let offset = i + self.offset();
+        assert!(offset < self.data.len());
+        unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, 
offset as usize) }
     }
 }
 
@@ -322,7 +322,7 @@ impl From<Vec<bool>> for PrimitiveArray<bool> {
             let mut_slice = mut_buf.data_mut();
             for (i, b) in data.iter().enumerate() {
                 if *b {
-                    bit_util::set_bit(mut_slice, i as i64);
+                    bit_util::set_bit(mut_slice, i);
                 }
             }
         }
@@ -347,9 +347,9 @@ impl From<Vec<Option<bool>>> for PrimitiveArray<bool> {
 
             for (i, v) in data.iter().enumerate() {
                 if let Some(b) = v {
-                    bit_util::set_bit(null_slice, i as i64);
+                    bit_util::set_bit(null_slice, i);
                     if *b {
-                        bit_util::set_bit(val_slice, i as i64);
+                        bit_util::set_bit(val_slice, i);
                     }
                 }
             }
diff --git a/rust/src/array_data.rs b/rust/src/array_data.rs
index 180d3f9..055c8d9 100644
--- a/rust/src/array_data.rs
+++ b/rust/src/array_data.rs
@@ -68,7 +68,7 @@ impl ArrayData {
     ) -> Self {
         if null_count < 0 {
             null_count = if let Some(ref buf) = null_bit_buffer {
-                len - bit_util::count_set_bits_offset(buf.data(), offset)
+                len - bit_util::count_set_bits_offset(buf.data(), offset as 
usize)
             } else {
                 0
             };
diff --git a/rust/src/bitmap.rs b/rust/src/bitmap.rs
index 2fdcf87..6cec4d5 100644
--- a/rust/src/bitmap.rs
+++ b/rust/src/bitmap.rs
@@ -46,7 +46,8 @@ impl Bitmap {
     }
 
     pub fn is_set(&self, i: i64) -> bool {
-        bit_util::get_bit(self.bits.data(), i)
+        assert!(i < (self.bits.len() << 3) as i64);
+        unsafe { bit_util::get_bit_raw(self.bits.raw_data(), i as usize) }
     }
 }
 
diff --git a/rust/src/util/bit_util.rs b/rust/src/util/bit_util.rs
index 5e4c35c..e652281 100644
--- a/rust/src/util/bit_util.rs
+++ b/rust/src/util/bit_util.rs
@@ -43,8 +43,8 @@ fn round_upto_power_of_2(num: i64, factor: i64) -> i64 {
 
 /// Returns whether bit at position `i` in `data` is set or not
 #[inline]
-pub fn get_bit(data: &[u8], i: i64) -> bool {
-    (data[(i >> 3) as usize] & BIT_MASK[(i & 7) as usize]) != 0
+pub fn get_bit(data: &[u8], i: usize) -> bool {
+    (data[i >> 3] & BIT_MASK[i & 7]) != 0
 }
 
 /// Returns whether bit at position `i` in `data` is set or not.
@@ -52,14 +52,14 @@ pub fn get_bit(data: &[u8], i: i64) -> bool {
 /// Note this doesn't do any bound checking, for performance reason. The 
caller is
 /// responsible to guarantee that `i` is within bounds.
 #[inline]
-pub unsafe fn get_bit_raw(data: *const u8, i: i64) -> bool {
-    (*data.offset((i >> 3) as isize) & BIT_MASK[(i & 7) as usize]) != 0
+pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
+    (*data.offset((i >> 3) as isize) & BIT_MASK[i & 7]) != 0
 }
 
 /// Sets bit at position `i` for `data`
 #[inline]
-pub fn set_bit(data: &mut [u8], i: i64) {
-    data[(i >> 3) as usize] |= BIT_MASK[(i & 7) as usize]
+pub fn set_bit(data: &mut [u8], i: usize) {
+    data[i >> 3] |= BIT_MASK[i & 7]
 }
 
 /// Returns the number of 1-bits in `data`
@@ -74,8 +74,8 @@ pub fn count_set_bits(data: &[u8]) -> i64 {
 
 /// Returns the number of 1-bits in `data`, starting from `offset`.
 #[inline]
-pub fn count_set_bits_offset(data: &[u8], offset: i64) -> i64 {
-    debug_assert!(offset <= (data.len() * 8) as i64);
+pub fn count_set_bits_offset(data: &[u8], offset: usize) -> i64 {
+    debug_assert!(offset <= (data.len() << 3));
 
     let start_byte_pos = (offset >> 3) as usize;
     let start_bit_pos = offset & 7;
@@ -86,7 +86,7 @@ pub fn count_set_bits_offset(data: &[u8], offset: i64) -> i64 
{
         let mut result = 0;
         result += count_set_bits(&data[start_byte_pos + 1..]);
         for i in start_bit_pos..8 {
-            if get_bit(&data[start_byte_pos..start_byte_pos + 1], i as i64) {
+            if get_bit(&data[start_byte_pos..start_byte_pos + 1], i as usize) {
                 result += 1;
             }
         }
@@ -158,14 +158,14 @@ mod tests {
             let b = rng.gen_bool(0.5);
             expected.push(b);
             if b {
-                set_bit(&mut buf[..], i as i64)
+                set_bit(&mut buf[..], i)
             }
         }
 
         let raw_ptr = buf.as_ptr();
         for (i, b) in expected.iter().enumerate() {
             unsafe {
-                assert_eq!(*b, get_bit_raw(raw_ptr, i as i64));
+                assert_eq!(*b, get_bit_raw(raw_ptr, i));
             }
         }
     }
@@ -192,10 +192,10 @@ mod tests {
         for _ in 0..NUM_SETS {
             let offset = rng.gen_range(0, 8 * NUM_BYTES);
             v.insert(offset);
-            set_bit(&mut buffer[..], offset as i64);
+            set_bit(&mut buffer[..], offset);
         }
         for i in 0..NUM_BYTES * 8 {
-            assert_eq!(v.contains(&i), get_bit(&buffer[..], i as i64));
+            assert_eq!(v.contains(&i), get_bit(&buffer[..], i));
         }
     }
 

Reply via email to