(datafusion) branch main updated: Reduce code repetition in `datafusion/functions` mod files (#10700)

alamb Mon, 03 Jun 2024 11:43:18 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 3aae451d38 Reduce code repetition in `datafusion/functions` mod files 
(#10700)
3aae451d38 is described below

commit 3aae451d38476510670fff04404418955a4fc83c
Author: Mohamed Abdeen <[email protected]>
AuthorDate: Mon Jun 3 21:43:07 2024 +0300

    Reduce code repetition in `datafusion/functions` mod files (#10700)
    
    * initial reduce repetition using macros
    
    * formatting and docs
    
    * fix docs
    
    * refix doc
    
    * replace math mod too
    
    * fix vec arguments
    
    * fix math variadic args
    
    * apply to functions
    
    * pattern-match hack to avoid second macro
    
    * missed a function
    
    * fix merge conflict
    
    * fix octet_length argument
---
 datafusion/functions/src/core/mod.rs     |  82 +++++------
 datafusion/functions/src/crypto/mod.rs   |  60 ++++----
 datafusion/functions/src/datetime/mod.rs | 123 +++++++---------
 datafusion/functions/src/encoding/mod.rs |  22 ++-
 datafusion/functions/src/macros.rs       |  38 ++---
 datafusion/functions/src/math/mod.rs     | 238 ++++++-------------------------
 datafusion/functions/src/string/mod.rs   | 181 +++++++++++------------
 datafusion/functions/src/unicode/mod.rs  | 111 +++++++-------
 8 files changed, 339 insertions(+), 516 deletions(-)

diff --git a/datafusion/functions/src/core/mod.rs 
b/datafusion/functions/src/core/mod.rs
index 349d483a41..a2742220f3 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -42,59 +42,49 @@ make_udf_function!(named_struct::NamedStructFunc, 
NAMED_STRUCT, named_struct);
 make_udf_function!(getfield::GetFieldFunc, GET_FIELD, get_field);
 make_udf_function!(coalesce::CoalesceFunc, COALESCE, coalesce);
 
-// Export the functions out of this package, both as expr_fn as well as a list 
of functions
 pub mod expr_fn {
     use datafusion_expr::{Expr, Literal};
 
-    /// returns NULL if value1 equals value2; otherwise it returns value1. This
-    /// can be used to perform the inverse operation of the COALESCE expression
-    pub fn nullif(arg1: Expr, arg2: Expr) -> Expr {
-        super::nullif().call(vec![arg1, arg2])
-    }
-
-    /// returns value1 cast to the `arrow_type` given the second argument. This
-    /// can be used to cast to a specific `arrow_type`.
-    pub fn arrow_cast(arg1: Expr, arg2: Expr) -> Expr {
-        super::arrow_cast().call(vec![arg1, arg2])
-    }
-
-    /// Returns value2 if value1 is NULL; otherwise it returns value1
-    pub fn nvl(arg1: Expr, arg2: Expr) -> Expr {
-        super::nvl().call(vec![arg1, arg2])
-    }
-
-    /// Returns value2 if value1 is not NULL; otherwise, it returns value3.
-    pub fn nvl2(arg1: Expr, arg2: Expr, arg3: Expr) -> Expr {
-        super::nvl2().call(vec![arg1, arg2, arg3])
-    }
-
-    /// Returns the Arrow type of the input expression.
-    pub fn arrow_typeof(arg1: Expr) -> Expr {
-        super::arrow_typeof().call(vec![arg1])
-    }
-
-    /// Returns a struct with the given arguments
-    pub fn r#struct(args: Vec<Expr>) -> Expr {
-        super::r#struct().call(args)
-    }
-
-    /// Returns a struct with the given names and arguments pairs
-    pub fn named_struct(args: Vec<Expr>) -> Expr {
-        super::named_struct().call(args)
-    }
-
-    /// Returns the value of the field with the given name from the struct
-    pub fn get_field(arg1: Expr, field_name: impl Literal) -> Expr {
-        super::get_field().call(vec![arg1, field_name.lit()])
-    }
+    export_functions!((
+        nullif,
+        "Returns NULL if value1 equals value2; otherwise it returns value1. 
This can be used to perform the inverse operation of the COALESCE expression",
+        arg1 arg2
+    ),(
+        arrow_cast,
+        "Returns value2 if value1 is NULL; otherwise it returns value1",
+        arg1 arg2
+    ),(
+        nvl,
+        "Returns value2 if value1 is NULL; otherwise it returns value1",
+        arg1 arg2
+    ),(
+        nvl2,
+        "Returns value2 if value1 is not NULL; otherwise, it returns value3.",
+        arg1 arg2 arg3
+    ),(
+        arrow_typeof,
+        "Returns the Arrow type of the input expression.",
+        arg1
+    ),(
+        r#struct,
+        "Returns a struct with the given arguments",
+        args,
+    ),(
+        named_struct,
+        "Returns a struct with the given names and arguments pairs",
+        args,
+    ),(
+        coalesce,
+        "Returns `coalesce(args...)`, which evaluates to the value of the 
first expr which is not NULL",
+        args,
+    ));
 
-    /// Returns `coalesce(args...)`, which evaluates to the value of the first 
expr which is not NULL
-    pub fn coalesce(args: Vec<Expr>) -> Expr {
-        super::coalesce().call(args)
+    #[doc = "Returns the value of the field with the given name from the 
struct"]
+    pub fn get_field(arg1: Expr, arg2: impl Literal) -> Expr {
+        super::get_field().call(vec![arg1, arg2.lit()])
     }
 }
 
-///   Return a list of all functions in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         nullif(),
diff --git a/datafusion/functions/src/crypto/mod.rs 
b/datafusion/functions/src/crypto/mod.rs
index a879fdb45b..497c1af62a 100644
--- a/datafusion/functions/src/crypto/mod.rs
+++ b/datafusion/functions/src/crypto/mod.rs
@@ -17,6 +17,9 @@
 
 //! "crypto" DataFusion functions
 
+use datafusion_expr::ScalarUDF;
+use std::sync::Arc;
+
 pub mod basic;
 pub mod digest;
 pub mod md5;
@@ -30,28 +33,35 @@ make_udf_function!(sha224::SHA224Func, SHA224, sha224);
 make_udf_function!(sha256::SHA256Func, SHA256, sha256);
 make_udf_function!(sha384::SHA384Func, SHA384, sha384);
 make_udf_function!(sha512::SHA512Func, SHA512, sha512);
-export_functions!((
-    digest,
-    input_arg1 input_arg2,
-    "Computes the binary hash of an expression using the specified algorithm."
-),(
-    md5,
-    input_arg,
-    "Computes an MD5 128-bit checksum for a string expression."
-),(
-    sha224,
-    input_arg1,
-    "Computes the SHA-224 hash of a binary string."
-),(
-    sha256,
-    input_arg1,
-    "Computes the SHA-256 hash of a binary string."
-),(
-    sha384,
-    input_arg1,
-    "Computes the SHA-384 hash of a binary string."
-),(
-    sha512,
-    input_arg1,
-    "Computes the SHA-512 hash of a binary string."
-));
+
+pub mod expr_fn {
+    export_functions!((
+        digest,
+        "Computes the binary hash of an expression using the specified 
algorithm.",
+        input_arg1 input_arg2
+    ),(
+        md5,
+        "Computes an MD5 128-bit checksum for a string expression.",
+        input_arg
+    ),(
+        sha224,
+        "Computes the SHA-224 hash of a binary string.",
+        input_arg1
+    ),(
+        sha256,
+        "Computes the SHA-256 hash of a binary string.",
+        input_arg1
+    ),(
+        sha384,
+        "Computes the SHA-384 hash of a binary string.",
+        input_arg1
+    ),(
+        sha512,
+        "Computes the SHA-512 hash of a binary string.",
+        input_arg1
+    ));
+}
+
+pub fn functions() -> Vec<Arc<ScalarUDF>> {
+    vec![digest(), md5(), sha224(), sha256(), sha384(), sha512()]
+}
diff --git a/datafusion/functions/src/datetime/mod.rs 
b/datafusion/functions/src/datetime/mod.rs
index c6939976eb..9c2f80856b 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -79,45 +79,60 @@ make_udf_function!(
 pub mod expr_fn {
     use datafusion_expr::Expr;
 
-    #[doc = "returns current UTC date as a Date32 value"]
-    pub fn current_date() -> Expr {
-        super::current_date().call(vec![])
-    }
-
-    #[doc = "returns current UTC time as a Time64 value"]
-    pub fn current_time() -> Expr {
-        super::current_time().call(vec![])
-    }
-
-    #[doc = "coerces an arbitrary timestamp to the start of the nearest 
specified interval"]
-    pub fn date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr {
-        super::date_bin().call(vec![stride, source, origin])
-    }
-
-    #[doc = "extracts a subfield from the date"]
-    pub fn date_part(part: Expr, date: Expr) -> Expr {
-        super::date_part().call(vec![part, date])
-    }
-
-    #[doc = "truncates the date to a specified level of precision"]
-    pub fn date_trunc(part: Expr, date: Expr) -> Expr {
-        super::date_trunc().call(vec![part, date])
-    }
-
-    #[doc = "converts an integer to RFC3339 timestamp format string"]
-    pub fn from_unixtime(unixtime: Expr) -> Expr {
-        super::from_unixtime().call(vec![unixtime])
-    }
-
-    #[doc = "make a date from year, month and day component parts"]
-    pub fn make_date(year: Expr, month: Expr, day: Expr) -> Expr {
-        super::make_date().call(vec![year, month, day])
-    }
-
-    #[doc = "returns the current timestamp in nanoseconds, using the same 
value for all instances of now() in same statement"]
-    pub fn now() -> Expr {
-        super::now().call(vec![])
-    }
+    export_functions!((
+        current_date,
+        "returns current UTC date as a Date32 value",
+    ),(
+        current_time,
+        "returns current UTC time as a Time64 value",
+    ),(
+        from_unixtime,
+        "converts an integer to RFC3339 timestamp format string",
+        unixtime
+    ),(
+        date_bin,
+        "coerces an arbitrary timestamp to the start of the nearest specified 
interval",
+        stride source origin
+    ),(
+        date_part,
+        "extracts a subfield from the date",
+        part date
+    ),(
+        date_trunc,
+        "truncates the date to a specified level of precision",
+        part date
+    ),(
+        make_date,
+        "make a date from year, month and day component parts",
+        year month day
+    ),(
+        now,
+        "returns the current timestamp in nanoseconds, using the same value 
for all instances of now() in same statement",
+    ),(
+        to_unixtime,
+        "converts a string and optional formats to a Unixtime",
+        args,
+    ),(
+        to_timestamp,
+        "converts a string and optional formats to a `Timestamp(Nanoseconds, 
None)`",
+        args,
+    ),(
+        to_timestamp_seconds,
+        "converts a string and optional formats to a `Timestamp(Seconds, 
None)`",
+        args,
+    ),(
+        to_timestamp_millis,
+        "converts a string and optional formats to a `Timestamp(Milliseconds, 
None)`",
+        args,
+    ),(
+        to_timestamp_micros,
+        "converts a string and optional formats to a `Timestamp(Microseconds, 
None)`",
+        args,
+    ),(
+        to_timestamp_nanos,
+        "converts a string and optional formats to a `Timestamp(Nanoseconds, 
None)`",
+        args,
+    ));
 
     /// Returns a string representation of a date, time, timestamp or duration 
based
     /// on a Chrono pattern.
@@ -247,36 +262,6 @@ pub mod expr_fn {
     pub fn to_date(args: Vec<Expr>) -> Expr {
         super::to_date().call(args)
     }
-
-    #[doc = "converts a string and optional formats to a Unixtime"]
-    pub fn to_unixtime(args: Vec<Expr>) -> Expr {
-        super::to_unixtime().call(args)
-    }
-
-    #[doc = "converts a string and optional formats to a 
`Timestamp(Nanoseconds, None)`"]
-    pub fn to_timestamp(args: Vec<Expr>) -> Expr {
-        super::to_timestamp().call(args)
-    }
-
-    #[doc = "converts a string and optional formats to a `Timestamp(Seconds, 
None)`"]
-    pub fn to_timestamp_seconds(args: Vec<Expr>) -> Expr {
-        super::to_timestamp_seconds().call(args)
-    }
-
-    #[doc = "converts a string and optional formats to a 
`Timestamp(Milliseconds, None)`"]
-    pub fn to_timestamp_millis(args: Vec<Expr>) -> Expr {
-        super::to_timestamp_millis().call(args)
-    }
-
-    #[doc = "converts a string and optional formats to a 
`Timestamp(Microseconds, None)`"]
-    pub fn to_timestamp_micros(args: Vec<Expr>) -> Expr {
-        super::to_timestamp_micros().call(args)
-    }
-
-    #[doc = "converts a string and optional formats to a 
`Timestamp(Nanoseconds, None)`"]
-    pub fn to_timestamp_nanos(args: Vec<Expr>) -> Expr {
-        super::to_timestamp_nanos().call(args)
-    }
 }
 
 ///   Return a list of all functions in this package
diff --git a/datafusion/functions/src/encoding/mod.rs 
b/datafusion/functions/src/encoding/mod.rs
index 49f914a687..24e11e5d63 100644
--- a/datafusion/functions/src/encoding/mod.rs
+++ b/datafusion/functions/src/encoding/mod.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use datafusion_expr::ScalarUDF;
+use std::sync::Arc;
+
 pub mod inner;
 
 // create `encode` and `decode` UDFs
@@ -22,7 +25,18 @@ make_udf_function!(inner::EncodeFunc, ENCODE, encode);
 make_udf_function!(inner::DecodeFunc, DECODE, decode);
 
 // Export the functions out of this package, both as expr_fn as well as a list 
of functions
-export_functions!(
-    (encode, input encoding, "encode the `input`, using the `encoding`. 
encoding can be base64 or hex"),
-    (decode, input encoding, "decode the `input`, using the `encoding`. 
encoding can be base64 or hex")
-);
+pub mod expr_fn {
+    export_functions!( (
+        encode,
+        "encode the `input`, using the `encoding`. encoding can be base64 or 
hex",
+        input encoding
+    ),(
+        decode,
+        "decode the `input`, using the `encoding`. encoding can be base64 or 
hex",
+        input encoding
+    ));
+}
+
+pub fn functions() -> Vec<Arc<ScalarUDF>> {
+    vec![encode(), decode()]
+}
diff --git a/datafusion/functions/src/macros.rs 
b/datafusion/functions/src/macros.rs
index dcc37f100c..cae689b3e0 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -36,25 +36,31 @@
 ///    ]
 /// }
 /// ```
+///
+/// Exported functions accept:
+/// - `Vec<Expr>` argument (single argument followed by a comma)
+/// - Variable number of `Expr` arguments (zero or more arguments, must be 
without commas)
 macro_rules! export_functions {
-    ($(($FUNC:ident,  $($arg:ident)*, $DOC:expr)),*) => {
-        pub mod expr_fn {
-            $(
-                #[doc = $DOC]
-                /// Return $name(arg)
-                pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> 
datafusion_expr::Expr {
-                    super::$FUNC().call(vec![$($arg),*],)
-                }
-            )*
+    ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
+        $(
+            // switch to single-function cases below
+            export_functions!(single $FUNC, $DOC, $($arg)*);
+        )*
+    };
+
+    // single vector argument (a single argument followed by a comma)
+    (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
+        #[doc = $DOC]
+        pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> 
datafusion_expr::Expr {
+            super::$FUNC().call($arg)
         }
+    };
 
-        /// Return a list of all functions in this package
-        pub fn functions() -> Vec<std::sync::Arc<datafusion_expr::ScalarUDF>> {
-            vec![
-                $(
-                    $FUNC(),
-                )*
-            ]
+    // variadic arguments (zero or more arguments, without commas)
+    (single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
+        #[doc = $DOC]
+        pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> 
datafusion_expr::Expr {
+            super::$FUNC().call(vec![$($arg),*])
         }
     };
 }
diff --git a/datafusion/functions/src/math/mod.rs 
b/datafusion/functions/src/math/mod.rs
index 387237acb7..9ee173bb61 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -17,11 +17,9 @@
 
 //! "math" DataFusion functions
 
-use std::sync::Arc;
-
 use crate::math::monotonicity::*;
-
 use datafusion_expr::ScalarUDF;
+use std::sync::Arc;
 
 pub mod abs;
 pub mod cot;
@@ -92,200 +90,48 @@ make_math_unary_udf!(TanhFunc, TANH, tanh, tanh, 
super::tanh_order);
 make_udf_function!(trunc::TruncFunc, TRUNC, trunc);
 
 pub mod expr_fn {
-    use datafusion_expr::Expr;
-
-    #[doc = "returns the absolute value of a given number"]
-    pub fn abs(num: Expr) -> Expr {
-        super::abs().call(vec![num])
-    }
-
-    #[doc = "returns the arc cosine or inverse cosine of a number"]
-    pub fn acos(num: Expr) -> Expr {
-        super::acos().call(vec![num])
-    }
-
-    #[doc = "returns inverse hyperbolic cosine"]
-    pub fn acosh(num: Expr) -> Expr {
-        super::acosh().call(vec![num])
-    }
-
-    #[doc = "returns the arc sine or inverse sine of a number"]
-    pub fn asin(num: Expr) -> Expr {
-        super::asin().call(vec![num])
-    }
-
-    #[doc = "returns inverse hyperbolic sine"]
-    pub fn asinh(num: Expr) -> Expr {
-        super::asinh().call(vec![num])
-    }
-
-    #[doc = "returns inverse tangent"]
-    pub fn atan(num: Expr) -> Expr {
-        super::atan().call(vec![num])
-    }
-
-    #[doc = "returns inverse tangent of a division given in the argument"]
-    pub fn atan2(y: Expr, x: Expr) -> Expr {
-        super::atan2().call(vec![y, x])
-    }
-
-    #[doc = "returns inverse hyperbolic tangent"]
-    pub fn atanh(num: Expr) -> Expr {
-        super::atanh().call(vec![num])
-    }
-
-    #[doc = "cube root of a number"]
-    pub fn cbrt(num: Expr) -> Expr {
-        super::cbrt().call(vec![num])
-    }
-
-    #[doc = "nearest integer greater than or equal to argument"]
-    pub fn ceil(num: Expr) -> Expr {
-        super::ceil().call(vec![num])
-    }
-
-    #[doc = "cosine"]
-    pub fn cos(num: Expr) -> Expr {
-        super::cos().call(vec![num])
-    }
-
-    #[doc = "hyperbolic cosine"]
-    pub fn cosh(num: Expr) -> Expr {
-        super::cosh().call(vec![num])
-    }
-
-    #[doc = "cotangent of a number"]
-    pub fn cot(num: Expr) -> Expr {
-        super::cot().call(vec![num])
-    }
-
-    #[doc = "converts radians to degrees"]
-    pub fn degrees(num: Expr) -> Expr {
-        super::degrees().call(vec![num])
-    }
-
-    #[doc = "exponential"]
-    pub fn exp(num: Expr) -> Expr {
-        super::exp().call(vec![num])
-    }
-
-    #[doc = "factorial"]
-    pub fn factorial(num: Expr) -> Expr {
-        super::factorial().call(vec![num])
-    }
-
-    #[doc = "nearest integer less than or equal to argument"]
-    pub fn floor(num: Expr) -> Expr {
-        super::floor().call(vec![num])
-    }
-
-    #[doc = "greatest common divisor"]
-    pub fn gcd(x: Expr, y: Expr) -> Expr {
-        super::gcd().call(vec![x, y])
-    }
-
-    #[doc = "returns true if a given number is +NaN or -NaN otherwise returns 
false"]
-    pub fn isnan(num: Expr) -> Expr {
-        super::isnan().call(vec![num])
-    }
-
-    #[doc = "returns true if a given number is +0.0 or -0.0 otherwise returns 
false"]
-    pub fn iszero(num: Expr) -> Expr {
-        super::iszero().call(vec![num])
-    }
-
-    #[doc = "least common multiple"]
-    pub fn lcm(x: Expr, y: Expr) -> Expr {
-        super::lcm().call(vec![x, y])
-    }
-
-    #[doc = "natural logarithm (base e) of a number"]
-    pub fn ln(num: Expr) -> Expr {
-        super::ln().call(vec![num])
-    }
-
-    #[doc = "logarithm of a number for a particular `base`"]
-    pub fn log(base: Expr, num: Expr) -> Expr {
-        super::log().call(vec![base, num])
-    }
-
-    #[doc = "base 2 logarithm of a number"]
-    pub fn log2(num: Expr) -> Expr {
-        super::log2().call(vec![num])
-    }
-
-    #[doc = "base 10 logarithm of a number"]
-    pub fn log10(num: Expr) -> Expr {
-        super::log10().call(vec![num])
-    }
-
-    #[doc = "returns x if x is not NaN otherwise returns y"]
-    pub fn nanvl(x: Expr, y: Expr) -> Expr {
-        super::nanvl().call(vec![x, y])
-    }
-
-    #[doc = "Returns an approximate value of π"]
-    pub fn pi() -> Expr {
-        super::pi().call(vec![])
-    }
-
-    #[doc = "`base` raised to the power of `exponent`"]
-    pub fn power(base: Expr, exponent: Expr) -> Expr {
-        super::power().call(vec![base, exponent])
-    }
-
-    #[doc = "converts degrees to radians"]
-    pub fn radians(num: Expr) -> Expr {
-        super::radians().call(vec![num])
-    }
-
-    #[doc = "Returns a random value in the range 0.0 <= x < 1.0"]
-    pub fn random() -> Expr {
-        super::random().call(vec![])
-    }
-
-    #[doc = "round to nearest integer"]
-    pub fn round(args: Vec<Expr>) -> Expr {
-        super::round().call(args)
-    }
-
-    #[doc = "sign of the argument (-1, 0, +1)"]
-    pub fn signum(num: Expr) -> Expr {
-        super::signum().call(vec![num])
-    }
-
-    #[doc = "sine"]
-    pub fn sin(num: Expr) -> Expr {
-        super::sin().call(vec![num])
-    }
-
-    #[doc = "hyperbolic sine"]
-    pub fn sinh(num: Expr) -> Expr {
-        super::sinh().call(vec![num])
-    }
-
-    #[doc = "square root of a number"]
-    pub fn sqrt(num: Expr) -> Expr {
-        super::sqrt().call(vec![num])
-    }
-
-    #[doc = "returns the tangent of a number"]
-    pub fn tan(num: Expr) -> Expr {
-        super::tan().call(vec![num])
-    }
-
-    #[doc = "returns the hyperbolic tangent of a number"]
-    pub fn tanh(num: Expr) -> Expr {
-        super::tanh().call(vec![num])
-    }
-
-    #[doc = "truncate toward zero, with optional precision"]
-    pub fn trunc(args: Vec<Expr>) -> Expr {
-        super::trunc().call(args)
-    }
+    export_functions!(
+        (abs, "returns the absolute value of a given number", num),
+        (acos, "returns the arc cosine or inverse cosine of a number", num),
+        (acosh, "returns inverse hyperbolic cosine", num),
+        (asin, "returns the arc sine or inverse sine of a number", num),
+        (asinh, "returns inverse hyperbolic sine", num),
+        (atan, "returns inverse tangent", num),
+        (atan2, "returns inverse tangent of a division given in the argument", 
y x),
+        (atanh, "returns inverse hyperbolic tangent", num),
+        (cbrt, "cube root of a number", num),
+        (ceil, "nearest integer greater than or equal to argument", num),
+        (cos, "cosine", num),
+        (cosh, "hyperbolic cosine", num),
+        (cot, "cotangent of a number", num),
+        (degrees, "converts radians to degrees", num),
+        (exp, "exponential", num),
+        (factorial, "factorial", num),
+        (floor, "nearest integer less than or equal to argument", num),
+        (gcd, "greatest common divisor", x y),
+        (isnan, "returns true if a given number is +NaN or -NaN otherwise 
returns false", num),
+        (iszero, "returns true if a given number is +0.0 or -0.0 otherwise 
returns false", num),
+        (lcm, "least common multiple", x y),
+        (ln, "natural logarithm (base e) of a number", num),
+        (log, "logarithm of a number for a particular `base`", base num),
+        (log2, "base 2 logarithm of a number", num),
+        (log10, "base 10 logarithm of a number", num),
+        (nanvl, "returns x if x is not NaN otherwise returns y", x y),
+        (pi, "Returns an approximate value of π",),
+        (power, "`base` raised to the power of `exponent`", base exponent),
+        (radians, "converts degrees to radians", num),
+        (random, "Returns a random value in the range 0.0 <= x < 1.0",),
+        (signum, "sign of the argument (-1, 0, +1)", num),
+        (sin, "sine", num),
+        (sinh, "hyperbolic sine", num),
+        (sqrt, "square root of a number", num),
+        (tan, "returns the tangent of a number", num),
+        (tanh, "returns the hyperbolic tangent of a number", num),
+        (round, "round to nearest integer", args,),
+        (trunc, "truncate toward zero, with optional precision", args,)
+    );
 }
 
-///   Return a list of all functions in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         abs(),
@@ -318,13 +164,13 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         power(),
         radians(),
         random(),
-        round(),
         signum(),
         sin(),
         sinh(),
         sqrt(),
         tan(),
         tanh(),
+        round(),
         trunc(),
     ]
 }
diff --git a/datafusion/functions/src/string/mod.rs 
b/datafusion/functions/src/string/mod.rs
index e931c49981..219ef8b5a5 100644
--- a/datafusion/functions/src/string/mod.rs
+++ b/datafusion/functions/src/string/mod.rs
@@ -70,117 +70,98 @@ make_udf_function!(uuid::UuidFunc, UUID, uuid);
 pub mod expr_fn {
     use datafusion_expr::Expr;
 
-    #[doc = "Returns the numeric code of the first character of the argument."]
-    pub fn ascii(arg1: Expr) -> Expr {
-        super::ascii().call(vec![arg1])
-    }
-
-    #[doc = "Returns the number of bits in the `string`"]
-    pub fn bit_length(arg: Expr) -> Expr {
-        super::bit_length().call(vec![arg])
-    }
+    export_functions!((
+        ascii,
+        "Returns the numeric code of the first character of the argument.",
+        arg1
+    ),(
+        bit_length,
+        "Returns the number of bits in the `string`",
+        arg1
+    ),(
+        btrim,
+        "Removes all characters, spaces by default, from both sides of a 
string",
+        args,
+    ),(
+        chr,
+        "Converts the Unicode code point to a UTF8 character",
+        arg1
+    ),(
+        concat,
+        "Concatenates the text representations of all the arguments. NULL 
arguments are ignored",
+        args,
+    ),(
+        ends_with,
+        "Returns true if the `string` ends with the `suffix`, false 
otherwise.",
+        string suffix
+    ),(
+        initcap,
+        "Converts the first letter of each word in `string` in uppercase and 
the remaining characters in lowercase",
+        string
+    ),(
+        levenshtein,
+        "Returns the Levenshtein distance between the two given strings",
+        arg1 arg2
+    ),(
+        lower,
+        "Converts a string to lowercase.",
+        arg1
+    ),(
+        ltrim,
+        "Removes all characters, spaces by default, from the beginning of a 
string",
+        args,
+    ),(
+        octet_length,
+        "returns the number of bytes of a string",
+        args
+    ),(
+        overlay,
+        "replace the substring of string that starts at the start'th character 
and extends for count characters with new substring",
+        args,
+    ),(
+        repeat,
+        "Repeats the `string` to `n` times",
+        string n
+    ),(
+        replace,
+        "Replaces all occurrences of `from` with `to` in the `string`",
+        string from to
+    ),(
+        rtrim,
+        "Removes all characters, spaces by default, from the end of a string",
+        args,
+    ),(
+        split_part,
+        "Splits a string based on a delimiter and picks out the desired field 
based on the index.",
+        string delimiter index
+    ),(
+        starts_with,
+        "Returns true if string starts with prefix.",
+        arg1 arg2
+    ),(
+        to_hex,
+        "Converts an integer to a hexadecimal string.",
+        arg1
+    ),(
+        upper,
+        "Converts a string to uppercase.",
+        arg1
+    ),(
+        uuid,
+        "returns uuid v4 as a string value",
+    ));
 
     #[doc = "Removes all characters, spaces by default, from both sides of a 
string"]
-    pub fn btrim(args: Vec<Expr>) -> Expr {
+    pub fn trim(args: Vec<Expr>) -> Expr {
         super::btrim().call(args)
     }
 
-    #[doc = "Converts the Unicode code point to a UTF8 character"]
-    pub fn chr(arg: Expr) -> Expr {
-        super::chr().call(vec![arg])
-    }
-
-    #[doc = "Concatenates the text representations of all the arguments. NULL 
arguments are ignored"]
-    pub fn concat(args: Vec<Expr>) -> Expr {
-        super::concat().call(args)
-    }
-
     #[doc = "Concatenates all but the first argument, with separators. The 
first argument is used as the separator string, and should not be NULL. Other 
NULL arguments are ignored."]
     pub fn concat_ws(delimiter: Expr, args: Vec<Expr>) -> Expr {
         let mut args = args;
         args.insert(0, delimiter);
         super::concat_ws().call(args)
     }
-
-    #[doc = "Returns true if the `string` ends with the `suffix`, false 
otherwise."]
-    pub fn ends_with(string: Expr, suffix: Expr) -> Expr {
-        super::ends_with().call(vec![string, suffix])
-    }
-
-    #[doc = "Converts the first letter of each word in `string` in uppercase 
and the remaining characters in lowercase"]
-    pub fn initcap(string: Expr) -> Expr {
-        super::initcap().call(vec![string])
-    }
-
-    #[doc = "Returns the Levenshtein distance between the two given strings"]
-    pub fn levenshtein(arg1: Expr, arg2: Expr) -> Expr {
-        super::levenshtein().call(vec![arg1, arg2])
-    }
-
-    #[doc = "Converts a string to lowercase."]
-    pub fn lower(arg1: Expr) -> Expr {
-        super::lower().call(vec![arg1])
-    }
-
-    #[doc = "Removes all characters, spaces by default, from the beginning of 
a string"]
-    pub fn ltrim(args: Vec<Expr>) -> Expr {
-        super::ltrim().call(args)
-    }
-
-    #[doc = "returns the number of bytes of a string"]
-    pub fn octet_length(args: Expr) -> Expr {
-        super::octet_length().call(vec![args])
-    }
-
-    #[doc = "replace the substring of string that starts at the start'th 
character and extends for count characters with new substring"]
-    pub fn overlay(args: Vec<Expr>) -> Expr {
-        super::overlay().call(args)
-    }
-
-    #[doc = "Repeats the `string` to `n` times"]
-    pub fn repeat(string: Expr, n: Expr) -> Expr {
-        super::repeat().call(vec![string, n])
-    }
-
-    #[doc = "Replaces all occurrences of `from` with `to` in the `string`"]
-    pub fn replace(string: Expr, from: Expr, to: Expr) -> Expr {
-        super::replace().call(vec![string, from, to])
-    }
-
-    #[doc = "Removes all characters, spaces by default, from the end of a 
string"]
-    pub fn rtrim(args: Vec<Expr>) -> Expr {
-        super::rtrim().call(args)
-    }
-
-    #[doc = "Splits a string based on a delimiter and picks out the desired 
field based on the index."]
-    pub fn split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr {
-        super::split_part().call(vec![string, delimiter, index])
-    }
-
-    #[doc = "Returns true if string starts with prefix."]
-    pub fn starts_with(arg1: Expr, arg2: Expr) -> Expr {
-        super::starts_with().call(vec![arg1, arg2])
-    }
-
-    #[doc = "Converts an integer to a hexadecimal string."]
-    pub fn to_hex(arg1: Expr) -> Expr {
-        super::to_hex().call(vec![arg1])
-    }
-
-    #[doc = "Removes all characters, spaces by default, from both sides of a 
string"]
-    pub fn trim(args: Vec<Expr>) -> Expr {
-        super::btrim().call(args)
-    }
-
-    #[doc = "Converts a string to uppercase."]
-    pub fn upper(arg1: Expr) -> Expr {
-        super::upper().call(vec![arg1])
-    }
-
-    #[doc = "returns uuid v4 as a string value"]
-    pub fn uuid() -> Expr {
-        super::uuid().call(vec![])
-    }
 }
 
 ///   Return a list of all functions in this package
diff --git a/datafusion/functions/src/unicode/mod.rs 
b/datafusion/functions/src/unicode/mod.rs
index 5a8e953bc1..9e8c07cd36 100644
--- a/datafusion/functions/src/unicode/mod.rs
+++ b/datafusion/functions/src/unicode/mod.rs
@@ -47,27 +47,68 @@ make_udf_function!(reverse::ReverseFunc, REVERSE, reverse);
 make_udf_function!(rpad::RPadFunc, RPAD, rpad);
 make_udf_function!(strpos::StrposFunc, STRPOS, strpos);
 make_udf_function!(substr::SubstrFunc, SUBSTR, substr);
+make_udf_function!(substr::SubstrFunc, SUBSTRING, substring);
 make_udf_function!(substrindex::SubstrIndexFunc, SUBSTR_INDEX, substr_index);
 make_udf_function!(translate::TranslateFunc, TRANSLATE, translate);
 
 pub mod expr_fn {
     use datafusion_expr::Expr;
 
+    export_functions!((
+        character_length,
+        "the number of characters in the `string`",
+        string
+    ),(
+        lpad,
+        "fill up a string to the length by prepending the characters",
+        args,
+    ),(
+        rpad,
+        "fill up a string to the length by appending the characters",
+        args,
+    ),(
+        reverse,
+        "reverses the `string`",
+        string
+    ),(
+        substr,
+        "substring from the `position` to the end",
+        string position
+    ),(
+        substr_index,
+        "Returns the substring from str before count occurrences of the 
delimiter",
+        string delimiter count
+    ),(
+        strpos,
+        "finds the position from where the `substring` matches the `string`",
+        string substring
+    ),(
+        substring,
+        "substring from the `position` with `length` characters",
+        string position length
+    ),(
+        translate,
+        "replaces the characters in `from` with the counterpart in `to`",
+        string from to
+    ),(
+        right,
+        "returns the last `n` characters in the `string`",
+        string n
+    ),(
+        left,
+        "returns the first `n` characters in the `string`",
+        string n
+    ),(
+        find_in_set,
+        "Returns a value in the range of 1 to N if the string str is in the 
string list strlist consisting of N substrings",
+        string strlist
+    ));
+
     #[doc = "the number of characters in the `string`"]
     pub fn char_length(string: Expr) -> Expr {
         character_length(string)
     }
 
-    #[doc = "the number of characters in the `string`"]
-    pub fn character_length(string: Expr) -> Expr {
-        super::character_length().call(vec![string])
-    }
-
-    #[doc = "Returns a value in the range of 1 to N if the string str is in 
the string list strlist consisting of N substrings"]
-    pub fn find_in_set(string: Expr, strlist: Expr) -> Expr {
-        super::find_in_set().call(vec![string, strlist])
-    }
-
     #[doc = "finds the position from where the `substring` matches the 
`string`"]
     pub fn instr(string: Expr, substring: Expr) -> Expr {
         strpos(string, substring)
@@ -78,60 +119,10 @@ pub mod expr_fn {
         character_length(string)
     }
 
-    #[doc = "returns the first `n` characters in the `string`"]
-    pub fn left(string: Expr, n: Expr) -> Expr {
-        super::left().call(vec![string, n])
-    }
-
-    #[doc = "fill up a string to the length by prepending the characters"]
-    pub fn lpad(args: Vec<Expr>) -> Expr {
-        super::lpad().call(args)
-    }
-
     #[doc = "finds the position from where the `substring` matches the 
`string`"]
     pub fn position(string: Expr, substring: Expr) -> Expr {
         strpos(string, substring)
     }
-
-    #[doc = "reverses the `string`"]
-    pub fn reverse(string: Expr) -> Expr {
-        super::reverse().call(vec![string])
-    }
-
-    #[doc = "returns the last `n` characters in the `string`"]
-    pub fn right(string: Expr, n: Expr) -> Expr {
-        super::right().call(vec![string, n])
-    }
-
-    #[doc = "fill up a string to the length by appending the characters"]
-    pub fn rpad(args: Vec<Expr>) -> Expr {
-        super::rpad().call(args)
-    }
-
-    #[doc = "finds the position from where the `substring` matches the 
`string`"]
-    pub fn strpos(string: Expr, substring: Expr) -> Expr {
-        super::strpos().call(vec![string, substring])
-    }
-
-    #[doc = "substring from the `position` to the end"]
-    pub fn substr(string: Expr, position: Expr) -> Expr {
-        super::substr().call(vec![string, position])
-    }
-
-    #[doc = "substring from the `position` with `length` characters"]
-    pub fn substring(string: Expr, position: Expr, length: Expr) -> Expr {
-        super::substr().call(vec![string, position, length])
-    }
-
-    #[doc = "Returns the substring from str before count occurrences of the 
delimiter"]
-    pub fn substr_index(string: Expr, delimiter: Expr, count: Expr) -> Expr {
-        super::substr_index().call(vec![string, delimiter, count])
-    }
-
-    #[doc = "replaces the characters in `from` with the counterpart in `to`"]
-    pub fn translate(string: Expr, from: Expr, to: Expr) -> Expr {
-        super::translate().call(vec![string, from, to])
-    }
 }
 
 ///   Return a list of all functions in this package


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Reduce code repetition in `datafusion/functions` mod files (#10700)

Reply via email to