This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 097f04c2ec fix(spark): handle divide-by-zero in Spark `mod`/`pmod` 
with ANSI mode support (#20461)
097f04c2ec is described below

commit 097f04c2ec728791683b00f017831368829dac86
Author: David López <[email protected]>
AuthorDate: Mon Mar 9 21:49:58 2026 +0100

    fix(spark): handle divide-by-zero in Spark `mod`/`pmod` with ANSI mode 
support (#20461)
    
    ## Which issue does this PR close?
    
    - NA.
    
    ## Rationale for this change
    
    Spark's `mod` and `pmod` functions return `NULL` on integer division by
    zero in legacy mode (ANSI off), but DataFusion's implementation always
    threw a `DivideByZero` error regardless of the ANSI mode setting.
    ## What changes are included in this PR?
    
    Add ANSI mode support to `spark_mod` and `spark_pmod` via
    enable_ansi_mode config option
    In legacy mode (ANSI off): division by zero returns `NULL` per-element
    In ANSI mode (ANSI on): division by zero throws an error (unchanged
    behavior)
    Add `try_rem` helper that handles per-element zero-divisor masking for
    integer arrays
    
    
    ## Are these changes tested?
    
    Yes:
    
    - 18 unit tests in modulus.rs (including new tests for both ANSI modes)
    - Updated pmod.slt and mod.slt sqllogictests with ANSI on/off coverage
    
    ## Are there any user-facing changes?
    
    Yes — mod(10, 0) and pmod(10, 0) now return NULL instead of erroring
    when enable_ansi_mode = false (the default), matching Spark behavior.
---
 datafusion/spark/src/function/math/modulus.rs      | 144 ++++++++++++++++-----
 .../sqllogictest/test_files/spark/math/mod.slt     |  29 +++++
 .../sqllogictest/test_files/spark/math/pmod.slt    |  22 +++-
 3 files changed, 164 insertions(+), 31 deletions(-)

diff --git a/datafusion/spark/src/function/math/modulus.rs 
b/datafusion/spark/src/function/math/modulus.rs
index 49657e2cb8..7a21aabbdf 100644
--- a/datafusion/spark/src/function/math/modulus.rs
+++ b/datafusion/spark/src/function/math/modulus.rs
@@ -15,8 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::array::{Scalar, new_null_array};
 use arrow::compute::kernels::numeric::add;
-use arrow::compute::kernels::{cmp::lt, numeric::rem, zip::zip};
+use arrow::compute::kernels::{
+    cmp::{eq, lt},
+    numeric::rem,
+    zip::zip,
+};
 use arrow::datatypes::DataType;
 use datafusion_common::{Result, ScalarValue, assert_eq_or_internal_err};
 use datafusion_expr::{
@@ -24,28 +29,61 @@ use datafusion_expr::{
 };
 use std::any::Any;
 
+/// Attempts `rem(left, right)` with per-element divide-by-zero handling.
+/// In ANSI mode, any zero divisor causes an error.
+/// In legacy mode (ANSI off), positions where the divisor is zero return NULL
+/// while other positions compute normally.
+fn try_rem(
+    left: &arrow::array::ArrayRef,
+    right: &arrow::array::ArrayRef,
+    enable_ansi_mode: bool,
+) -> Result<arrow::array::ArrayRef> {
+    match rem(left, right) {
+        Ok(result) => Ok(result),
+        Err(arrow::error::ArrowError::DivideByZero) if !enable_ansi_mode => {
+            // Integer rem fails when ANY divisor element is zero.
+            // Handle per-element: null out zero divisors
+            let zero = ScalarValue::new_zero(right.data_type())?.to_array()?;
+            let zero = Scalar::new(zero);
+            let null = Scalar::new(new_null_array(right.data_type(), 1));
+            let is_zero = eq(right, &zero)?;
+            let safe_right = zip(&is_zero, &null, right)?;
+            Ok(rem(left, &safe_right)?)
+        }
+        Err(e) => Err(e.into()),
+    }
+}
+
 /// Spark-compatible `mod` function
-/// This function directly uses Arrow's arithmetic_op function for modulo 
operations
-pub fn spark_mod(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+/// In ANSI mode, division by zero throws an error.
+/// In legacy mode, division by zero returns NULL (Spark behavior).
+pub fn spark_mod(
+    args: &[ColumnarValue],
+    enable_ansi_mode: bool,
+) -> Result<ColumnarValue> {
     assert_eq_or_internal_err!(args.len(), 2, "mod expects exactly two 
arguments");
     let args = ColumnarValue::values_to_arrays(args)?;
-    let result = rem(&args[0], &args[1])?;
+    let result = try_rem(&args[0], &args[1], enable_ansi_mode)?;
     Ok(ColumnarValue::Array(result))
 }
 
 /// Spark-compatible `pmod` function
-/// This function directly uses Arrow's arithmetic_op function for modulo 
operations
-pub fn spark_pmod(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+/// In ANSI mode, division by zero throws an error.
+/// In legacy mode, division by zero returns NULL (Spark behavior).
+pub fn spark_pmod(
+    args: &[ColumnarValue],
+    enable_ansi_mode: bool,
+) -> Result<ColumnarValue> {
     assert_eq_or_internal_err!(args.len(), 2, "pmod expects exactly two 
arguments");
     let args = ColumnarValue::values_to_arrays(args)?;
     let left = &args[0];
     let right = &args[1];
     let zero = 
ScalarValue::new_zero(left.data_type())?.to_array_of_size(left.len())?;
-    let result = rem(left, right)?;
+    let result = try_rem(left, right, enable_ansi_mode)?;
     let neg = lt(&result, &zero)?;
     let plus = zip(&neg, right, &zero)?;
     let result = add(&plus, &result)?;
-    let result = rem(&result, right)?;
+    let result = try_rem(&result, right, enable_ansi_mode)?;
     Ok(ColumnarValue::Array(result))
 }
 
@@ -95,7 +133,7 @@ impl ScalarUDFImpl for SparkMod {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
-        spark_mod(&args.args)
+        spark_mod(&args.args, args.config_options.execution.enable_ansi_mode)
     }
 }
 
@@ -145,7 +183,7 @@ impl ScalarUDFImpl for SparkPmod {
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
-        spark_pmod(&args.args)
+        spark_pmod(&args.args, args.config_options.execution.enable_ansi_mode)
     }
 }
 
@@ -165,7 +203,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_mod(&[left_value, right_value]).unwrap();
+        let result = spark_mod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int32 =
@@ -187,7 +225,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_mod(&[left_value, right_value]).unwrap();
+        let result = spark_mod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int64 =
@@ -228,7 +266,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_mod(&[left_value, right_value]).unwrap();
+        let result = spark_mod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_float64 = result_array
@@ -284,7 +322,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_mod(&[left_value, right_value]).unwrap();
+        let result = spark_mod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_float32 = result_array
@@ -319,7 +357,7 @@ mod test {
 
         let left_value = ColumnarValue::Array(Arc::new(left));
 
-        let result = spark_mod(&[left_value, right_value]).unwrap();
+        let result = spark_mod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int32 =
@@ -337,20 +375,43 @@ mod test {
         let left = Int32Array::from(vec![Some(10)]);
         let left_value = ColumnarValue::Array(Arc::new(left));
 
-        let result = spark_mod(&[left_value]);
+        let result = spark_mod(&[left_value], false);
         assert!(result.is_err());
     }
 
     #[test]
-    fn test_mod_zero_division() {
+    fn test_mod_zero_division_legacy() {
+        // In legacy mode (ANSI off), division by zero returns NULL per-element
+        let left = Int32Array::from(vec![Some(10), Some(7), Some(15)]);
+        let right = Int32Array::from(vec![Some(0), Some(2), Some(4)]);
+
+        let left_value = ColumnarValue::Array(Arc::new(left));
+        let right_value = ColumnarValue::Array(Arc::new(right));
+
+        let result = spark_mod(&[left_value, right_value], false).unwrap();
+
+        if let ColumnarValue::Array(result_array) = result {
+            let result_int32 =
+                result_array.as_any().downcast_ref::<Int32Array>().unwrap();
+            assert!(result_int32.is_null(0)); // 10 % 0 = NULL
+            assert_eq!(result_int32.value(1), 1); // 7 % 2 = 1
+            assert_eq!(result_int32.value(2), 3); // 15 % 4 = 3
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_mod_zero_division_ansi() {
+        // In ANSI mode, division by zero should error
         let left = Int32Array::from(vec![Some(10), Some(7), Some(15)]);
         let right = Int32Array::from(vec![Some(0), Some(2), Some(4)]);
 
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_mod(&[left_value, right_value]);
-        assert!(result.is_err()); // Division by zero should error
+        let result = spark_mod(&[left_value, right_value], true);
+        assert!(result.is_err());
     }
 
     // PMOD tests
@@ -362,7 +423,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_pmod(&[left_value, right_value]).unwrap();
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int32 =
@@ -385,7 +446,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_pmod(&[left_value, right_value]).unwrap();
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int64 =
@@ -425,7 +486,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_pmod(&[left_value, right_value]).unwrap();
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_float64 = result_array
@@ -476,7 +537,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_pmod(&[left_value, right_value]).unwrap();
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_float32 = result_array
@@ -508,7 +569,7 @@ mod test {
 
         let left_value = ColumnarValue::Array(Arc::new(left));
 
-        let result = spark_pmod(&[left_value, right_value]).unwrap();
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int32 =
@@ -527,20 +588,43 @@ mod test {
         let left = Int32Array::from(vec![Some(10)]);
         let left_value = ColumnarValue::Array(Arc::new(left));
 
-        let result = spark_pmod(&[left_value]);
+        let result = spark_pmod(&[left_value], false);
         assert!(result.is_err());
     }
 
     #[test]
-    fn test_pmod_zero_division() {
+    fn test_pmod_zero_division_legacy() {
+        // In legacy mode (ANSI off), division by zero returns NULL per-element
         let left = Int32Array::from(vec![Some(10), Some(-7), Some(15)]);
         let right = Int32Array::from(vec![Some(0), Some(0), Some(4)]);
 
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_pmod(&[left_value, right_value]);
-        assert!(result.is_err()); // Division by zero should error
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
+
+        if let ColumnarValue::Array(result_array) = result {
+            let result_int32 =
+                result_array.as_any().downcast_ref::<Int32Array>().unwrap();
+            assert!(result_int32.is_null(0)); // 10 pmod 0 = NULL
+            assert!(result_int32.is_null(1)); // -7 pmod 0 = NULL
+            assert_eq!(result_int32.value(2), 3); // 15 pmod 4 = 3
+        } else {
+            panic!("Expected array result");
+        }
+    }
+
+    #[test]
+    fn test_pmod_zero_division_ansi() {
+        // In ANSI mode, division by zero should error
+        let left = Int32Array::from(vec![Some(10), Some(-7), Some(15)]);
+        let right = Int32Array::from(vec![Some(0), Some(0), Some(4)]);
+
+        let left_value = ColumnarValue::Array(Arc::new(left));
+        let right_value = ColumnarValue::Array(Arc::new(right));
+
+        let result = spark_pmod(&[left_value, right_value], true);
+        assert!(result.is_err());
     }
 
     #[test]
@@ -552,7 +636,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_pmod(&[left_value, right_value]).unwrap();
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int32 =
@@ -590,7 +674,7 @@ mod test {
         let left_value = ColumnarValue::Array(Arc::new(left));
         let right_value = ColumnarValue::Array(Arc::new(right));
 
-        let result = spark_pmod(&[left_value, right_value]).unwrap();
+        let result = spark_pmod(&[left_value, right_value], false).unwrap();
 
         if let ColumnarValue::Array(result_array) = result {
             let result_int32 =
diff --git a/datafusion/sqllogictest/test_files/spark/math/mod.slt 
b/datafusion/sqllogictest/test_files/spark/math/mod.slt
index 2780b3e105..68c0f59f48 100644
--- a/datafusion/sqllogictest/test_files/spark/math/mod.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/mod.slt
@@ -144,6 +144,35 @@ SELECT MOD(10.0::decimal(3,1), 3.0::decimal(2,1)) as 
mod_decimal_2;
 ----
 1
 
+# Division by zero returns NULL in legacy mode (ANSI off)
+query I
+SELECT MOD(10::int, 0::int) as mod_div_zero_1;
+----
+NULL
+
+query I
+SELECT MOD(-7::int, 0::int) as mod_div_zero_2;
+----
+NULL
+
+query R
+SELECT MOD(10.5::float8, 0.0::float8) as mod_div_zero_float;
+----
+NaN
+
+# Division by zero errors in ANSI mode
+statement ok
+set datafusion.execution.enable_ansi_mode = true;
+
+statement error DataFusion error: Arrow error: Divide by zero error
+SELECT MOD(10::int, 0::int);
+
+statement error DataFusion error: Arrow error: Divide by zero error
+SELECT MOD(-7::int, 0::int);
+
+statement ok
+set datafusion.execution.enable_ansi_mode = false;
+
 # Edge cases
 query I
 SELECT MOD(0::int, 5::int) as mod_zero_1;
diff --git a/datafusion/sqllogictest/test_files/spark/math/pmod.slt 
b/datafusion/sqllogictest/test_files/spark/math/pmod.slt
index cf273c2d78..aa4a197ba4 100644
--- a/datafusion/sqllogictest/test_files/spark/math/pmod.slt
+++ b/datafusion/sqllogictest/test_files/spark/math/pmod.slt
@@ -64,8 +64,28 @@ SELECT pmod(0::int, 5::int) as pmod_zero_1;
 ----
 0
 
-statement error DataFusion error: Arrow error: Divide by zero error
+query I
 SELECT pmod(10::int, 0::int) as pmod_zero_2;
+----
+NULL
+
+query I
+SELECT pmod(-7::int, 0::int) as pmod_zero_3;
+----
+NULL
+
+# Division by zero errors in ANSI mode
+statement ok
+set datafusion.execution.enable_ansi_mode = true;
+
+statement error DataFusion error: Arrow error: Divide by zero error
+SELECT pmod(10::int, 0::int);
+
+statement error DataFusion error: Arrow error: Divide by zero error
+SELECT pmod(-7::int, 0::int);
+
+statement ok
+set datafusion.execution.enable_ansi_mode = false;
 
 # PMOD tests with NULL values
 query I


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to