[GitHub] [arrow-rs] tustvold commented on a diff in pull request #4303: Speed up i256 division and remainder operations

via GitHub Tue, 30 May 2023 03:07:10 -0700


tustvold commented on code in PR #4303:
URL: https://github.com/apache/arrow-rs/pull/4303#discussion_r1210025966



##########
arrow-buffer/src/bigint.rs:
##########
@@ -22,22 +22,33 @@ use std::num::ParseIntError;
 use std::ops::{BitAnd, BitOr, BitXor, Neg, Shl, Shr};
 use std::str::FromStr;
 
-/// An opaque error similar to [`std::num::ParseIntError`]
+/// [`i256`] operations return this error type.
 #[derive(Debug)]
-pub struct ParseI256Error {}
+pub enum I256Error {

Review Comment:
   This is a breaking change (which is fine)



##########
arrow-buffer/src/bigint.rs:
##########
@@ -396,42 +407,93 @@ impl i256 {
             .then_some(Self { low, high })
     }
 
+    /// Return the least number of bits needed to represent the number
+    #[inline]
+    fn bits_required(&self) -> usize {
+        let arr = self.to_le_bytes();
+        let iter = arr.iter().rev().take(32 - 1);
+        if self.is_negative() {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MAX).count();
+            (8 * (32 - ctr)) + 1 - (!arr[32 - ctr - 1]).leading_zeros() as 
usize
+        } else {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MIN).count();
+            (8 * (32 - ctr)) + 1 - arr[32 - ctr - 1].leading_zeros() as usize
+        }
+    }
+
+    /// divmod like operation, returns (quotient, remainder)
+    #[inline]
+    fn div_rem(self, other: Self) -> Result<(Self, Self), I256Error> {
+        if other == Self::ZERO {
+            return Err(I256Error::DivideByZero);
+        }
+        if other.is_negative() && self == Self::MIN && other == 
Self::ONE.wrapping_neg() {
+            return Err(I256Error::DivideOverflow);
+        }
+
+        if self == Self::MIN || other == Self::MIN {
+            let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+            let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+            let d = i256::from_bigint_with_overflow(&l / &r).0;
+            let r = i256::from_bigint_with_overflow(&l % &r).0;
+            return Ok((d, r));
+        }
+
+        let mut me = self.checked_abs().unwrap();
+        let mut you = other.checked_abs().unwrap();
+        let mut ret = [0u8; 32];
+        if me < you {
+            return Ok((Self::from_le_bytes(ret), self));
+        }
+
+        let shift = me.bits_required() - you.bits_required();

Review Comment:
   I think it would help readers of this code if you could link to an 
explanation of what this algorithm is doing - 
https://en.wikipedia.org/wiki/Division_algorithm has a fairly comprehensive list



##########
arrow-buffer/src/bigint.rs:
##########
@@ -396,42 +407,93 @@ impl i256 {
             .then_some(Self { low, high })
     }
 
+    /// Return the least number of bits needed to represent the number
+    #[inline]
+    fn bits_required(&self) -> usize {
+        let arr = self.to_le_bytes();
+        let iter = arr.iter().rev().take(32 - 1);
+        if self.is_negative() {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MAX).count();
+            (8 * (32 - ctr)) + 1 - (!arr[32 - ctr - 1]).leading_zeros() as 
usize
+        } else {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MIN).count();
+            (8 * (32 - ctr)) + 1 - arr[32 - ctr - 1].leading_zeros() as usize
+        }
+    }
+
+    /// divmod like operation, returns (quotient, remainder)
+    #[inline]
+    fn div_rem(self, other: Self) -> Result<(Self, Self), I256Error> {
+        if other == Self::ZERO {
+            return Err(I256Error::DivideByZero);
+        }
+        if other.is_negative() && self == Self::MIN && other == 
Self::ONE.wrapping_neg() {
+            return Err(I256Error::DivideOverflow);
+        }
+
+        if self == Self::MIN || other == Self::MIN {
+            let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+            let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+            let d = i256::from_bigint_with_overflow(&l / &r).0;
+            let r = i256::from_bigint_with_overflow(&l % &r).0;
+            return Ok((d, r));
+        }
+
+        let mut me = self.checked_abs().unwrap();
+        let mut you = other.checked_abs().unwrap();
+        let mut ret = [0u8; 32];
+        if me < you {
+            return Ok((Self::from_le_bytes(ret), self));
+        }
+

Review Comment:
   I wonder if at this point we could fallback to 128-bit division if both the 
high components are zero?



##########
arrow-buffer/src/bigint.rs:
##########
@@ -396,42 +407,93 @@ impl i256 {
             .then_some(Self { low, high })
     }
 
+    /// Return the least number of bits needed to represent the number
+    #[inline]
+    fn bits_required(&self) -> usize {
+        let arr = self.to_le_bytes();

Review Comment:
   I suspect the performance of this could be improved by using `leading_zeros` 
or `leading_ones` on the 128-bit representations 



##########
arrow-buffer/src/bigint.rs:
##########
@@ -396,42 +407,93 @@ impl i256 {
             .then_some(Self { low, high })
     }
 
+    /// Return the least number of bits needed to represent the number
+    #[inline]
+    fn bits_required(&self) -> usize {
+        let arr = self.to_le_bytes();
+        let iter = arr.iter().rev().take(32 - 1);
+        if self.is_negative() {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MAX).count();
+            (8 * (32 - ctr)) + 1 - (!arr[32 - ctr - 1]).leading_zeros() as 
usize
+        } else {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MIN).count();
+            (8 * (32 - ctr)) + 1 - arr[32 - ctr - 1].leading_zeros() as usize
+        }
+    }
+
+    /// divmod like operation, returns (quotient, remainder)
+    #[inline]
+    fn div_rem(self, other: Self) -> Result<(Self, Self), I256Error> {
+        if other == Self::ZERO {
+            return Err(I256Error::DivideByZero);
+        }
+        if other.is_negative() && self == Self::MIN && other == 
Self::ONE.wrapping_neg() {
+            return Err(I256Error::DivideOverflow);
+        }
+
+        if self == Self::MIN || other == Self::MIN {
+            let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+            let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+            let d = i256::from_bigint_with_overflow(&l / &r).0;

Review Comment:
   This appears to discard overflow - which will cause incorrect behaviour for 
checked_div?



##########
arrow-buffer/src/bigint.rs:
##########
@@ -1078,4 +1141,113 @@ mod tests {
             assert_eq!(i256::from_string(case), expected)
         }
     }
+
+    /// Returns fixed seedable RNG
+    fn seedable_rng() -> StdRng {
+        StdRng::seed_from_u64(42)
+    }
+
+    fn create_i256_vec(size: usize) -> Vec<i256> {
+        let mut rng = seedable_rng();
+
+        (0..size)
+            .map(|_| i256::from_i128(rng.gen::<i128>()))
+            .collect()
+    }
+
+    #[test]
+    fn test_i256_checked_div() {

Review Comment:
   Perhaps we could fold these tests into test_ops. This would have the benefit 
of being less code, and also better coverage



##########
arrow-buffer/src/bigint.rs:
##########
@@ -396,42 +407,93 @@ impl i256 {
             .then_some(Self { low, high })
     }
 
+    /// Return the least number of bits needed to represent the number
+    #[inline]
+    fn bits_required(&self) -> usize {
+        let arr = self.to_le_bytes();
+        let iter = arr.iter().rev().take(32 - 1);
+        if self.is_negative() {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MAX).count();
+            (8 * (32 - ctr)) + 1 - (!arr[32 - ctr - 1]).leading_zeros() as 
usize
+        } else {
+            let ctr = iter.take_while(|&&b| b == ::core::u8::MIN).count();
+            (8 * (32 - ctr)) + 1 - arr[32 - ctr - 1].leading_zeros() as usize
+        }
+    }
+
+    /// divmod like operation, returns (quotient, remainder)
+    #[inline]
+    fn div_rem(self, other: Self) -> Result<(Self, Self), I256Error> {
+        if other == Self::ZERO {
+            return Err(I256Error::DivideByZero);
+        }
+        if other.is_negative() && self == Self::MIN && other == 
Self::ONE.wrapping_neg() {
+            return Err(I256Error::DivideOverflow);
+        }
+
+        if self == Self::MIN || other == Self::MIN {
+            let l = BigInt::from_signed_bytes_le(&self.to_le_bytes());
+            let r = BigInt::from_signed_bytes_le(&other.to_le_bytes());
+            let d = i256::from_bigint_with_overflow(&l / &r).0;
+            let r = i256::from_bigint_with_overflow(&l % &r).0;
+            return Ok((d, r));
+        }
+
+        let mut me = self.checked_abs().unwrap();
+        let mut you = other.checked_abs().unwrap();
+        let mut ret = [0u8; 32];
+        if me < you {
+            return Ok((Self::from_le_bytes(ret), self));
+        }
+
+        let shift = me.bits_required() - you.bits_required();
+        you = you.shl(shift as u8);
+        for i in (0..=shift).rev() {
+            if me >= you {
+                ret[i / 8] |= 1 << (i % 8);

Review Comment:
   Similar to the above, per-byte arithmetic is likely significantly less 
efficient than arithmetic on 128-bit quantities.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow-rs] tustvold commented on a diff in pull request #4303: Speed up i256 division and remainder operations

Reply via email to