This is an automated email from the ASF dual-hosted git repository.
jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 2d1e8505ea fix: LCM panicked due to overflow (#11131)
2d1e8505ea is described below
commit 2d1e8505eacc77137425cc0fed2076bdb6af91a0
Author: Jonah Gao <[email protected]>
AuthorDate: Thu Jun 27 11:15:20 2024 +0800
fix: LCM panicked due to overflow (#11131)
* fix: LCM panicked due to overflow
* add test
* fix comment
---
datafusion/functions/src/math/gcd.rs | 2 +-
datafusion/functions/src/math/lcm.rs | 28 +++++++------
datafusion/sqllogictest/test_files/math.slt | 57 +++++++++++++++++++++++++++
datafusion/sqllogictest/test_files/scalar.slt | 35 ----------------
4 files changed, 74 insertions(+), 48 deletions(-)
diff --git a/datafusion/functions/src/math/gcd.rs
b/datafusion/functions/src/math/gcd.rs
index ace4937d6d..95a559c5d1 100644
--- a/datafusion/functions/src/math/gcd.rs
+++ b/datafusion/functions/src/math/gcd.rs
@@ -93,7 +93,7 @@ fn gcd(args: &[ArrayRef]) -> Result<ArrayRef> {
}
/// Computes gcd of two unsigned integers using Binary GCD algorithm.
-fn unsigned_gcd(mut a: u64, mut b: u64) -> u64 {
+pub(super) fn unsigned_gcd(mut a: u64, mut b: u64) -> u64 {
if a == 0 {
return b;
}
diff --git a/datafusion/functions/src/math/lcm.rs
b/datafusion/functions/src/math/lcm.rs
index d58bc5ef0b..21c201657e 100644
--- a/datafusion/functions/src/math/lcm.rs
+++ b/datafusion/functions/src/math/lcm.rs
@@ -27,7 +27,7 @@ use datafusion_common::{arrow_datafusion_err, exec_err,
DataFusionError, Result}
use datafusion_expr::ColumnarValue;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
-use crate::math::gcd::compute_gcd;
+use super::gcd::unsigned_gcd;
use crate::utils::make_scalar_function;
#[derive(Debug)]
@@ -75,19 +75,23 @@ impl ScalarUDFImpl for LcmFunc {
/// Lcm SQL function
fn lcm(args: &[ArrayRef]) -> Result<ArrayRef> {
let compute_lcm = |x: i64, y: i64| {
- let a = x.wrapping_abs();
- let b = y.wrapping_abs();
-
- if a == 0 || b == 0 {
+ if x == 0 || y == 0 {
return Ok(0);
}
- match compute_gcd(a, b) {
- Ok(gcd) => Ok(a / gcd * b),
- // change the error string to use LCM instead of GCD
- Err(_) =>
Err(arrow_datafusion_err!(ArrowError::ComputeError(format!(
- "Signed integer overflow in LCM({x}, {y})"
- )))),
- }
+
+ // lcm(x, y) = |x| * |y| / gcd(|x|, |y|)
+ let a = x.unsigned_abs();
+ let b = y.unsigned_abs();
+ let gcd = unsigned_gcd(a, b);
+ // gcd is not zero since both a and b are not zero, so the division is
safe.
+ (a / gcd)
+ .checked_mul(b)
+ .and_then(|v| i64::try_from(v).ok())
+ .ok_or_else(|| {
+ arrow_datafusion_err!(ArrowError::ComputeError(format!(
+ "Signed integer overflow in LCM({x}, {y})"
+ )))
+ })
};
match args[0].data_type() {
diff --git a/datafusion/sqllogictest/test_files/math.slt
b/datafusion/sqllogictest/test_files/math.slt
index 19de6560c2..ec1b0cfd93 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -565,6 +565,21 @@ SELECT c1%0 FROM test_non_nullable_decimal
statement ok
drop table test_non_nullable_decimal
+statement ok
+CREATE TABLE signed_integers(
+ a INT,
+ b INT,
+ c INT,
+ d INT,
+ e INT,
+ f INT
+) as VALUES
+ (-1, 100, -567, 1024, -4, 10),
+ (2, -1000, 123, -256, 5, -11),
+ (-3, 10000, -978, 2048, -6, 12),
+ (4, NULL, NULL, -512, NULL, NULL)
+;
+
query II
select gcd(20, 1000), lcm(20, 1000);
----
@@ -587,12 +602,54 @@ select gcd(0, 0), gcd(-100, 0), gcd(0, -100);
----
0 100 100
+
+## lcm
+
+# Basic cases
+query IIIII
+select lcm(0, 0), lcm(0, 2), lcm(3, 0), lcm(2, 3), lcm(15, 10);
+----
+0 0 0 6 30
+
+# Test lcm with negative numbers
+query IIIII
+select lcm(0, -2), lcm(-3, 0), lcm(-2, 3), lcm(15, -10), lcm(-15, -10)
+----
+0 0 6 30 30
+
+# Test lcm with Nulls
+query III
+select lcm(null, 64), lcm(16, null), lcm(null, null)
+----
+NULL NULL NULL
+
+# Test lcm with columns
+query III rowsort
+select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers;
+----
+100 580608 20
+1000 31488 55
+30000 1001472 12
+NULL NULL NULL
+
+# Result cannot fit in i64
query error DataFusion error: Arrow error: Compute error: Signed integer
overflow in LCM\(\-9223372036854775808, \-9223372036854775808\)
select lcm(-9223372036854775808, -9223372036854775808);
+query error DataFusion error: Arrow error: Compute error: Signed integer
overflow in LCM\(1, \-9223372036854775808\)
+select lcm(1, -9223372036854775808);
+
+# Overflow on multiplication
+query error DataFusion error: Arrow error: Compute error: Signed integer
overflow in LCM\(2, 9223372036854775803\)
+select lcm(2, 9223372036854775803);
+
+
query error DataFusion error: Arrow error: Compute error: Overflow happened
on: 2107754225 \^ 1221660777
select power(2107754225, 1221660777);
# factorial overflow
query error DataFusion error: Arrow error: Compute error: Overflow happened on
FACTORIAL\(350943270\)
select FACTORIAL(350943270);
+
+statement ok
+drop table signed_integers
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/scalar.slt
b/datafusion/sqllogictest/test_files/scalar.slt
index 551c50e0a1..a68d1cc7a7 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -493,41 +493,6 @@ select gcd(a, b), gcd(c, d), gcd(e, f) from
signed_integers;
2 1 1
NULL NULL NULL
-## lcm
-
-# lcm scalar function
-query III rowsort
-select lcm(0, 0), lcm(2, 3), lcm(15, 10);
-----
-0 6 30
-
-# lcm scalar nulls
-query I rowsort
-select lcm(null, 64);
-----
-NULL
-
-# lcm scalar nulls #1
-query I rowsort
-select lcm(2, null);
-----
-NULL
-
-# lcm scalar nulls #2
-query I rowsort
-select lcm(null, null);
-----
-NULL
-
-# lcm with columns
-query III rowsort
-select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers;
-----
-100 580608 20
-1000 31488 55
-30000 1001472 12
-NULL NULL NULL
-
## ln
# ln scalar function
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]