(datafusion) branch main updated: fix: LCM panicked due to overflow (#11131)

jonah Wed, 26 Jun 2024 20:15:28 -0700

This is an automated email from the ASF dual-hosted git repository.

jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 2d1e8505ea fix: LCM panicked due to overflow (#11131)
2d1e8505ea is described below

commit 2d1e8505eacc77137425cc0fed2076bdb6af91a0
Author: Jonah Gao <[email protected]>
AuthorDate: Thu Jun 27 11:15:20 2024 +0800

    fix: LCM panicked due to overflow (#11131)
    
    * fix: LCM panicked due to overflow
    
    * add test
    
    * fix comment
---
 datafusion/functions/src/math/gcd.rs          |  2 +-
 datafusion/functions/src/math/lcm.rs          | 28 +++++++------
 datafusion/sqllogictest/test_files/math.slt   | 57 +++++++++++++++++++++++++++
 datafusion/sqllogictest/test_files/scalar.slt | 35 ----------------
 4 files changed, 74 insertions(+), 48 deletions(-)

diff --git a/datafusion/functions/src/math/gcd.rs 
b/datafusion/functions/src/math/gcd.rs
index ace4937d6d..95a559c5d1 100644
--- a/datafusion/functions/src/math/gcd.rs
+++ b/datafusion/functions/src/math/gcd.rs
@@ -93,7 +93,7 @@ fn gcd(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 /// Computes gcd of two unsigned integers using Binary GCD algorithm.
-fn unsigned_gcd(mut a: u64, mut b: u64) -> u64 {
+pub(super) fn unsigned_gcd(mut a: u64, mut b: u64) -> u64 {
     if a == 0 {
         return b;
     }
diff --git a/datafusion/functions/src/math/lcm.rs 
b/datafusion/functions/src/math/lcm.rs
index d58bc5ef0b..21c201657e 100644
--- a/datafusion/functions/src/math/lcm.rs
+++ b/datafusion/functions/src/math/lcm.rs
@@ -27,7 +27,7 @@ use datafusion_common::{arrow_datafusion_err, exec_err, 
DataFusionError, Result}
 use datafusion_expr::ColumnarValue;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 
-use crate::math::gcd::compute_gcd;
+use super::gcd::unsigned_gcd;
 use crate::utils::make_scalar_function;
 
 #[derive(Debug)]
@@ -75,19 +75,23 @@ impl ScalarUDFImpl for LcmFunc {
 /// Lcm SQL function
 fn lcm(args: &[ArrayRef]) -> Result<ArrayRef> {
     let compute_lcm = |x: i64, y: i64| {
-        let a = x.wrapping_abs();
-        let b = y.wrapping_abs();
-
-        if a == 0 || b == 0 {
+        if x == 0 || y == 0 {
             return Ok(0);
         }
-        match compute_gcd(a, b) {
-            Ok(gcd) => Ok(a / gcd * b),
-            // change the error string to use LCM instead of GCD
-            Err(_) => 
Err(arrow_datafusion_err!(ArrowError::ComputeError(format!(
-                "Signed integer overflow in LCM({x}, {y})"
-            )))),
-        }
+
+        // lcm(x, y) = |x| * |y| / gcd(|x|, |y|)
+        let a = x.unsigned_abs();
+        let b = y.unsigned_abs();
+        let gcd = unsigned_gcd(a, b);
+        // gcd is not zero since both a and b are not zero, so the division is 
safe.
+        (a / gcd)
+            .checked_mul(b)
+            .and_then(|v| i64::try_from(v).ok())
+            .ok_or_else(|| {
+                arrow_datafusion_err!(ArrowError::ComputeError(format!(
+                    "Signed integer overflow in LCM({x}, {y})"
+                )))
+            })
     };
 
     match args[0].data_type() {
diff --git a/datafusion/sqllogictest/test_files/math.slt 
b/datafusion/sqllogictest/test_files/math.slt
index 19de6560c2..ec1b0cfd93 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -565,6 +565,21 @@ SELECT c1%0 FROM test_non_nullable_decimal
 statement ok
 drop table test_non_nullable_decimal 
 
+statement ok
+CREATE TABLE signed_integers(
+  a INT,
+  b INT,
+  c INT,
+  d INT,
+  e INT,
+  f INT
+) as VALUES
+  (-1, 100, -567, 1024, -4, 10),
+  (2, -1000, 123, -256, 5, -11),
+  (-3, 10000, -978, 2048, -6, 12),
+  (4, NULL, NULL, -512, NULL, NULL)
+;
+
 query II
 select gcd(20, 1000), lcm(20, 1000);
 ----
@@ -587,12 +602,54 @@ select gcd(0, 0), gcd(-100, 0), gcd(0, -100);
 ----
 0 100 100
 
+
+## lcm
+
+# Basic cases
+query IIIII
+select lcm(0, 0), lcm(0, 2), lcm(3, 0), lcm(2, 3), lcm(15, 10);
+----
+0 0 0 6 30
+
+# Test lcm with negative numbers
+query IIIII
+select lcm(0, -2), lcm(-3, 0), lcm(-2, 3), lcm(15, -10), lcm(-15, -10)
+----
+0 0 6 30 30
+
+# Test lcm with Nulls
+query III
+select lcm(null, 64), lcm(16, null), lcm(null, null)
+----
+NULL NULL NULL
+
+# Test lcm with columns
+query III rowsort
+select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers;
+----
+100 580608 20
+1000 31488 55
+30000 1001472 12
+NULL NULL NULL
+
+# Result cannot fit in i64
 query error DataFusion error: Arrow error: Compute error: Signed integer 
overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
 select lcm(-9223372036854775808, -9223372036854775808);
 
+query error DataFusion error: Arrow error: Compute error: Signed integer 
overflow in LCM\(1, \-9223372036854775808\)
+select lcm(1, -9223372036854775808);
+
+# Overflow on multiplication
+query error DataFusion error: Arrow error: Compute error: Signed integer 
overflow in LCM\(2, 9223372036854775803\)
+select lcm(2, 9223372036854775803);
+
+
 query error DataFusion error: Arrow error: Compute error: Overflow happened 
on: 2107754225 \^ 1221660777
 select power(2107754225, 1221660777);
 
 # factorial overflow
 query error DataFusion error: Arrow error: Compute error: Overflow happened on 
FACTORIAL\(350943270\)
 select FACTORIAL(350943270);
+
+statement ok
+drop table signed_integers
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/scalar.slt 
b/datafusion/sqllogictest/test_files/scalar.slt
index 551c50e0a1..a68d1cc7a7 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -493,41 +493,6 @@ select gcd(a, b), gcd(c, d), gcd(e, f) from 
signed_integers;
 2 1 1
 NULL NULL NULL
 
-## lcm
-
-# lcm scalar function
-query III rowsort
-select lcm(0, 0), lcm(2, 3), lcm(15, 10);
-----
-0 6 30
-
-# lcm scalar nulls
-query I rowsort
-select lcm(null, 64);
-----
-NULL
-
-# lcm scalar nulls #1
-query I rowsort
-select lcm(2, null);
-----
-NULL
-
-# lcm scalar nulls #2
-query I rowsort
-select lcm(null, null);
-----
-NULL
-
-# lcm with columns
-query III rowsort
-select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers;
-----
-100 580608 20
-1000 31488 55
-30000 1001472 12
-NULL NULL NULL
-
 ## ln
 
 # ln scalar function


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: fix: LCM panicked due to overflow (#11131)

Reply via email to