alamb commented on code in PR #14834:
URL: https://github.com/apache/datafusion/pull/14834#discussion_r1969616455


##########
datafusion/functions/src/math/gcd.rs:
##########
@@ -75,36 +77,73 @@ impl ScalarUDFImpl for GcdFunc {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Int64)
+        Ok(DataType::Int64)
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
-        make_scalar_function(gcd, vec![])(&args.args)
+        let args: [ColumnarValue; 2] = args.args.try_into().map_err(|_| {
+            internal_datafusion_err!("Expected 2 arguments for function gcd")
+        })?;
+
+        match args {
+            [ColumnarValue::Array(a), ColumnarValue::Array(b)] => {
+                compute_gcd_for_arrays(&a, &b)
+            }
+            [ColumnarValue::Scalar(ScalarValue::Int64(a)), 
ColumnarValue::Scalar(ScalarValue::Int64(b))] => {
+                match (a, b) {
+                    (Some(a), Some(b)) => 
Ok(ColumnarValue::Scalar(ScalarValue::Int64(
+                        Some(compute_gcd(a, b)?),
+                    ))),
+                    _ => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
+                }
+            }
+            [ColumnarValue::Array(a), 
ColumnarValue::Scalar(ScalarValue::Int64(b))] => {
+                compute_gcd_with_scalar(&a, b)
+            }
+            [ColumnarValue::Scalar(ScalarValue::Int64(a)), 
ColumnarValue::Array(b)] => {
+                compute_gcd_with_scalar(&b, a)
+            }
+            _ => exec_err!("Unsupported argument types for function gcd"),
+        }
     }
 
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
 }
 
-/// Gcd SQL function
-fn gcd(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args[0].data_type() {
-        Int64 => {
-            let arg1 = downcast_named_arg!(&args[0], "x", Int64Array);
-            let arg2 = downcast_named_arg!(&args[1], "y", Int64Array);
+fn compute_gcd_for_arrays(a: &ArrayRef, b: &ArrayRef) -> Result<ColumnarValue> 
{
+    let result: Result<Int64Array> = a
+        .as_primitive::<Int64Type>()
+        .iter()
+        .zip(b.as_primitive::<Int64Type>().iter())
+        .map(|(a, b)| match (a, b) {
+            (Some(a), Some(b)) => Ok(Some(compute_gcd(a, b)?)),
+            _ => Ok(None),
+        })
+        .collect();
+
+    result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
+}

Review Comment:
   With my proposal the two array version seems to go 25% faster than this PR. 
Here is a PR with that improvement:
   - https://github.com/jayzhan211/datafusion/pull/5
   
   ```
   Gnuplot not found, using plotters backend
   Benchmarking gcd both array: Warming up for 3.0000 s
   Warning: Unable to complete 100 samples in 5.0s. You may wish to increase 
target time to 5.9s, enable flat sampling, or reduce sample count to 60.
   gcd both array          time:   [1.1940 ms 1.2007 ms 1.2081 ms]
                           change: [-27.574% -27.080% -26.470%] (p = 0.00 < 
0.05)
                           Performance has improved.
   Found 9 outliers among 100 measurements (9.00%)
     6 (6.00%) high mild
     3 (3.00%) high severe
   
   gcd array and scalar    time:   [1.9644 ms 1.9705 ms 1.9781 ms]
                           change: [-1.3293% -0.9077% -0.4670%] (p = 0.00 < 
0.05)
                           Change within noise threshold.
   Found 6 outliers among 100 measurements (6.00%)
     3 (3.00%) high mild
     3 (3.00%) high severe
   
   gcd both scalar         time:   [26.094 ns 26.253 ns 26.467 ns]
                           change: [-1.1099% -0.6987% -0.2362%] (p = 0.00 < 
0.05)
                           Change within noise threshold.
   Found 8 outliers among 100 measurements (8.00%)
     4 (4.00%) high mild
     4 (4.00%) high severe
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to