Re: [PR] feat: Support log for Decimal128 and Decimal256 [datafusion]

via GitHub Tue, 02 Sep 2025 01:39:15 -0700


Jefffrey commented on code in PR #17023:
URL: https://github.com/apache/datafusion/pull/17023#discussion_r2315261104



##########
datafusion/functions/src/math/log.rs:
##########
@@ -58,21 +64,91 @@ impl Default for LogFunc {
 
 impl LogFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Float32]),
-                    Exact(vec![Float64]),
-                    Exact(vec![Float32, Float32]),
-                    Exact(vec![Float64, Float64]),
+                    Numeric(1),
+                    Numeric(2),
+                    Exact(vec![DataType::Float32]),
+                    Exact(vec![DataType::Float64]),
+                    Exact(vec![DataType::Float32, DataType::Float32]),
+                    Exact(vec![DataType::Float64, DataType::Float64]),
+                    Exact(vec![
+                        DataType::Int64,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float32,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float64,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Int64,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float32,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float64,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),

Review Comment:
   Are the `Exact(...)` signatures superseded by the `Numeric(1)` and 
`Numeric(2)`? Considering integers, floats and decimals are considered numeric 
types?



##########
datafusion/functions/src/math/log.rs:
##########
@@ -121,55 +198,68 @@ impl ScalarUDFImpl for LogFunc {
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
         let args = ColumnarValue::values_to_arrays(&args.args)?;
 
-        let mut base = ColumnarValue::Scalar(ScalarValue::Float32(Some(10.0)));
-
-        let mut x = &args[0];
-        if args.len() == 2 {
-            x = &args[1];
-            base = ColumnarValue::Array(Arc::clone(&args[0]));
-        }
-        // note in f64::log params order is different than in sql. e.g in sql 
log(base, x) == f64::log(x, base)
-        let arr: ArrayRef = match args[0].data_type() {
-            DataType::Float64 => match base {
-                ColumnarValue::Scalar(ScalarValue::Float32(Some(base))) => {
-                    Arc::new(x.as_primitive::<Float64Type>().unary::<_, 
Float64Type>(
-                        |value: f64| f64::log(value, base as f64),
-                    ))
-                }
-                ColumnarValue::Array(base) => {
-                    let x = x.as_primitive::<Float64Type>();
-                    let base = base.as_primitive::<Float64Type>();
-                    let result = arrow::compute::binary::<_, _, _, 
Float64Type>(
-                        x,
-                        base,
-                        f64::log,
-                    )?;
-                    Arc::new(result) as _
-                }
-                _ => {
-                    return exec_err!("log function requires a scalar or array 
for base")
-                }
-            },
-
-            DataType::Float32 => match base {
-                ColumnarValue::Scalar(ScalarValue::Float32(Some(base))) => 
Arc::new(
-                    x.as_primitive::<Float32Type>()
-                        .unary::<_, Float32Type>(|value: f32| f32::log(value, 
base)),
+        let (base, value) = if args.len() == 2 {
+            // note in f64::log params order is different than in sql. e.g in 
sql log(base, x) == f64::log(x, base)
+            (ColumnarValue::Array(Arc::clone(&args[0])), &args[1])
+        } else {
+            // log(num) - assume base is 10
+            let ret_type = if args[0].data_type().is_null() {
+                &DataType::Float64
+            } else {
+                args[0].data_type()
+            };

Review Comment:
   Should this logic match the above `return_type()` function?



##########
datafusion/functions/src/math/log.rs:
##########
@@ -58,21 +64,91 @@ impl Default for LogFunc {
 
 impl LogFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Float32]),
-                    Exact(vec![Float64]),
-                    Exact(vec![Float32, Float32]),
-                    Exact(vec![Float64, Float64]),
+                    Numeric(1),
+                    Numeric(2),
+                    Exact(vec![DataType::Float32]),
+                    Exact(vec![DataType::Float64]),
+                    Exact(vec![DataType::Float32, DataType::Float32]),
+                    Exact(vec![DataType::Float64, DataType::Float64]),
+                    Exact(vec![
+                        DataType::Int64,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float32,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float64,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Int64,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float32,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float64,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
                 ],
                 Volatility::Immutable,
             ),
         }
     }
 }
 
+/// Binary function to calculate an integer logarithm of Decimal128 `value` 
using `base` base
+/// Returns error if base is invalid
+fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64, 
ArrowError> {
+    if !base.is_finite() || base.trunc() != base || (base as u32) < 2 {
+        Err(ArrowError::ComputeError(format!(
+            "Log cannot use non-integer or small base {base}"
+        )))
+    } else {
+        let unscaled_value = decimal128_to_i128(value, scale)?;
+        if unscaled_value > 0 {
+            let log_value: u32 = unscaled_value.ilog(base as i128);
+            Ok(log_value as f64)
+        } else {
+            // Reflect f64::log behaviour
+            Ok(f64::NAN)
+        }
+    }
+}
+
+/// Binary function to calculate an integer logarithm of Decimal128 `value` 
using `base` base
+/// Returns error if base is invalid or if value is out of bounds of Decimal128
+fn log_decimal256(value: i256, scale: i8, base: f64) -> Result<f64, 
ArrowError> {
+    if !base.is_finite() || base.trunc() != base || (base as u32) < 2 {
+        Err(ArrowError::ComputeError(format!(
+            "Log cannot use non-integer or small base {base}"
+        )))
+    } else {
+        match value.to_i128() {
+            Some(short_value) => {
+                // Calculate logarithm only for 128-bit decimals
+                let unscaled_value = decimal128_to_i128(short_value, scale)?;
+                if unscaled_value > 0 {
+                    let log_value: u32 = unscaled_value.ilog(base as i128);
+                    Ok(log_value as f64)
+                } else {
+                    Ok(f64::NAN)
+                }
+            }
+            None => Err(ArrowError::ComputeError(format!(
+                "Log of a large Decimal256 is not supported: {value}"
+            ))),
+        }
+    }

Review Comment:
   ```suggestion
       match value.to_i128() {
           Some(value) => log_decimal128(value, scale, base),
           None => Err(ArrowError::NotYetImplemented(format!(
               "Log of Decimal256 larger than Decimal128 is not yet supported: 
{value}"
           ))),
       }
   ```
   
   Thoughts on reusing existing function above? Also changing error type as 
`NotYetImplemented` seems more appropriate (though maybe should use the 
DataFusion version instead of Arrow version 🤔 )



##########
datafusion/functions/src/math/log.rs:
##########
@@ -58,21 +64,91 @@ impl Default for LogFunc {
 
 impl LogFunc {
     pub fn new() -> Self {
-        use DataType::*;
         Self {
             signature: Signature::one_of(
                 vec![
-                    Exact(vec![Float32]),
-                    Exact(vec![Float64]),
-                    Exact(vec![Float32, Float32]),
-                    Exact(vec![Float64, Float64]),
+                    Numeric(1),
+                    Numeric(2),
+                    Exact(vec![DataType::Float32]),
+                    Exact(vec![DataType::Float64]),
+                    Exact(vec![DataType::Float32, DataType::Float32]),
+                    Exact(vec![DataType::Float64, DataType::Float64]),
+                    Exact(vec![
+                        DataType::Int64,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float32,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float64,
+                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Int64,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float32,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
+                    Exact(vec![
+                        DataType::Float64,
+                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    ]),
                 ],
                 Volatility::Immutable,
             ),
         }
     }
 }
 
+/// Binary function to calculate an integer logarithm of Decimal128 `value` 
using `base` base
+/// Returns error if base is invalid
+fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64, 
ArrowError> {
+    if !base.is_finite() || base.trunc() != base || (base as u32) < 2 {
+        Err(ArrowError::ComputeError(format!(
+            "Log cannot use non-integer or small base {base}"
+        )))
+    } else {
+        let unscaled_value = decimal128_to_i128(value, scale)?;
+        if unscaled_value > 0 {
+            let log_value: u32 = unscaled_value.ilog(base as i128);
+            Ok(log_value as f64)
+        } else {
+            // Reflect f64::log behaviour
+            Ok(f64::NAN)
+        }
+    }

Review Comment:
   ```suggestion
       if !base.is_finite() || base.trunc() != base {
           return Err(ArrowError::ComputeError(format!(
               "Log cannot use non-integer base: {base}"
           )));
       }
       if (base as u32) < 2 {
           return Err(ArrowError::ComputeError(format!(
               "Log base must be greater than 1: {base}"
           )));
       }
   
       let unscaled_value = decimal128_to_i128(value, scale)?;
       if unscaled_value > 0 {
           let log_value: u32 = unscaled_value.ilog(base as i128);
           Ok(log_value as f64)
       } else {
           // Reflect f64::log behaviour
           Ok(f64::NAN)
       }
   ```
   
   Just to clarify the errors a bit



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat: Support log for Decimal128 and Decimal256 [datafusion]

Reply via email to