This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 4c9e7876ea Move all `crypto` related functions to
`datafusion-functions` (#9590)
4c9e7876ea is described below
commit 4c9e7876ea88b074d588805f08ddd558977185b8
Author: Lordworms <[email protected]>
AuthorDate: Wed Mar 13 12:43:07 2024 -0500
Move all `crypto` related functions to `datafusion-functions` (#9590)
* port digest
* port digest
* port other crypto functions
* fix
* fix prost
* fix
* fix
* fix
* fix
* fix
* fix lock
* remove unused
* fix
* fix clippy
* remove useless
* fix clippy
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion-cli/Cargo.lock | 4 +
datafusion/expr/src/built_in_function.rs | 73 ----
datafusion/expr/src/expr_fn.rs | 29 +-
datafusion/functions/Cargo.toml | 16 +-
.../src/crypto/basic.rs} | 418 +++++++++++----------
datafusion/functions/src/crypto/digest.rs | 66 ++++
datafusion/functions/src/crypto/md5.rs | 81 ++++
datafusion/functions/src/crypto/mod.rs | 57 +++
datafusion/functions/src/crypto/sha224.rs | 60 +++
datafusion/functions/src/crypto/sha256.rs | 60 +++
datafusion/functions/src/crypto/sha384.rs | 60 +++
datafusion/functions/src/crypto/sha512.rs | 60 +++
datafusion/functions/src/lib.rs | 9 +-
datafusion/physical-expr/src/functions.rs | 299 +--------------
datafusion/physical-expr/src/lib.rs | 2 -
datafusion/proto/proto/datafusion.proto | 12 +-
datafusion/proto/src/generated/pbjson.rs | 18 -
datafusion/proto/src/generated/prost.rs | 24 +-
datafusion/proto/src/logical_plan/from_proto.rs | 37 +-
datafusion/proto/src/logical_plan/to_proto.rs | 6 -
20 files changed, 702 insertions(+), 689 deletions(-)
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index a0f68c76e4..578f185247 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1249,6 +1249,8 @@ dependencies = [
"arrow",
"arrow-array",
"base64 0.22.0",
+ "blake2",
+ "blake3",
"chrono",
"datafusion-common",
"datafusion-execution",
@@ -1257,7 +1259,9 @@ dependencies = [
"hex",
"itertools",
"log",
+ "md-5",
"regex",
+ "sha2",
]
[[package]]
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 0593ed4703..3bb004d6f5 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -60,8 +60,6 @@ pub enum BuiltinScalarFunction {
Cosh,
/// degrees
Degrees,
- /// Digest
- Digest,
/// exp
Exp,
/// factorial
@@ -168,8 +166,6 @@ pub enum BuiltinScalarFunction {
Lower,
/// ltrim
Ltrim,
- /// md5
- MD5,
/// octet_length
OctetLength,
/// random
@@ -186,14 +182,6 @@ pub enum BuiltinScalarFunction {
Rpad,
/// rtrim
Rtrim,
- /// sha224
- SHA224,
- /// sha256
- SHA256,
- /// sha384
- SHA384,
- /// Sha512
- SHA512,
/// split_part
SplitPart,
/// starts_with
@@ -337,7 +325,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Lpad => Volatility::Immutable,
BuiltinScalarFunction::Lower => Volatility::Immutable,
BuiltinScalarFunction::Ltrim => Volatility::Immutable,
- BuiltinScalarFunction::MD5 => Volatility::Immutable,
BuiltinScalarFunction::OctetLength => Volatility::Immutable,
BuiltinScalarFunction::Radians => Volatility::Immutable,
BuiltinScalarFunction::Repeat => Volatility::Immutable,
@@ -346,11 +333,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Right => Volatility::Immutable,
BuiltinScalarFunction::Rpad => Volatility::Immutable,
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
- BuiltinScalarFunction::SHA224 => Volatility::Immutable,
- BuiltinScalarFunction::SHA256 => Volatility::Immutable,
- BuiltinScalarFunction::SHA384 => Volatility::Immutable,
- BuiltinScalarFunction::SHA512 => Volatility::Immutable,
- BuiltinScalarFunction::Digest => Volatility::Immutable,
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
BuiltinScalarFunction::Strpos => Volatility::Immutable,
@@ -467,7 +449,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Ltrim => {
utf8_to_str_type(&input_expr_types[0], "ltrim")
}
- BuiltinScalarFunction::MD5 =>
utf8_to_str_type(&input_expr_types[0], "md5"),
BuiltinScalarFunction::OctetLength => {
utf8_to_int_type(&input_expr_types[0], "octet_length")
}
@@ -490,21 +471,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rtrim => {
utf8_to_str_type(&input_expr_types[0], "rtrim")
}
- BuiltinScalarFunction::SHA224 => {
- utf8_or_binary_to_binary_type(&input_expr_types[0], "sha224")
- }
- BuiltinScalarFunction::SHA256 => {
- utf8_or_binary_to_binary_type(&input_expr_types[0], "sha256")
- }
- BuiltinScalarFunction::SHA384 => {
- utf8_or_binary_to_binary_type(&input_expr_types[0], "sha384")
- }
- BuiltinScalarFunction::SHA512 => {
- utf8_or_binary_to_binary_type(&input_expr_types[0], "sha512")
- }
- BuiltinScalarFunction::Digest => {
- utf8_or_binary_to_binary_type(&input_expr_types[0], "digest")
- }
BuiltinScalarFunction::SplitPart => {
utf8_to_str_type(&input_expr_types[0], "split_part")
}
@@ -653,15 +619,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Coalesce => {
Signature::variadic_equal(self.volatility())
}
- BuiltinScalarFunction::SHA224
- | BuiltinScalarFunction::SHA256
- | BuiltinScalarFunction::SHA384
- | BuiltinScalarFunction::SHA512
- | BuiltinScalarFunction::MD5 => Signature::uniform(
- 1,
- vec![Utf8, LargeUtf8, Binary, LargeBinary],
- self.volatility(),
- ),
BuiltinScalarFunction::Ascii
| BuiltinScalarFunction::BitLength
| BuiltinScalarFunction::CharacterLength
@@ -736,15 +693,6 @@ impl BuiltinScalarFunction {
],
self.volatility(),
),
- BuiltinScalarFunction::Digest => Signature::one_of(
- vec![
- Exact(vec![Utf8, Utf8]),
- Exact(vec![LargeUtf8, Utf8]),
- Exact(vec![Binary, Utf8]),
- Exact(vec![LargeBinary, Utf8]),
- ],
- self.volatility(),
- ),
BuiltinScalarFunction::SplitPart => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8, Int64]),
@@ -1007,12 +955,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ToChar => &["to_char", "date_format"],
// hashing functions
- BuiltinScalarFunction::Digest => &["digest"],
- BuiltinScalarFunction::MD5 => &["md5"],
- BuiltinScalarFunction::SHA224 => &["sha224"],
- BuiltinScalarFunction::SHA256 => &["sha256"],
- BuiltinScalarFunction::SHA384 => &["sha384"],
- BuiltinScalarFunction::SHA512 => &["sha512"],
BuiltinScalarFunction::ArrayElement => &[
"array_element",
"array_extract",
@@ -1120,21 +1062,6 @@ get_optimal_return_type!(utf8_to_str_type,
DataType::LargeUtf8, DataType::Utf8);
// `utf8_to_int_type`: returns either a Int32 or Int64 based on the input type
size.
get_optimal_return_type!(utf8_to_int_type, DataType::Int64, DataType::Int32);
-fn utf8_or_binary_to_binary_type(arg_type: &DataType, name: &str) ->
Result<DataType> {
- Ok(match arg_type {
- DataType::LargeUtf8
- | DataType::Utf8
- | DataType::Binary
- | DataType::LargeBinary => DataType::Binary,
- DataType::Null => DataType::Null,
- _ => {
- return plan_err!(
- "The {name:?} function can only accept strings or binary
arrays."
- );
- }
- })
-}
-
#[cfg(test)]
mod tests {
use super::*;
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 8212f75583..b0b9056e52 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -706,7 +706,6 @@ scalar_expr!(
code_point,
"converts the Unicode code point to a UTF8 character"
);
-scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of
`input`, using the `algorithm`");
scalar_expr!(InitCap, initcap, string, "converts the first letter of each word
in `string` in uppercase and the remaining characters in lowercase");
scalar_expr!(Left, left, string n, "returns the first `n` characters in the
`string`");
scalar_expr!(Lower, lower, string, "convert the string to lower case");
@@ -716,7 +715,6 @@ scalar_expr!(
string,
"removes all characters, spaces by default, from the beginning of a string"
);
-scalar_expr!(MD5, md5, string, "returns the MD5 hash of a string");
scalar_expr!(
OctetLength,
octet_length,
@@ -733,10 +731,6 @@ scalar_expr!(
string,
"removes all characters, spaces by default, from the end of a string"
);
-scalar_expr!(SHA224, sha224, string, "SHA-224 hash");
-scalar_expr!(SHA256, sha256, string, "SHA-256 hash");
-scalar_expr!(SHA384, sha384, string, "SHA-384 hash");
-scalar_expr!(SHA512, sha512, string, "SHA-512 hash");
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string
based on a delimiter and picks out the desired field based on the index.");
scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string`
starts with the `prefix`");
scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends
with the `suffix`");
@@ -1112,7 +1106,7 @@ pub fn call_fn(name: impl AsRef<str>, args: Vec<Expr>) ->
Result<Expr> {
#[cfg(test)]
mod test {
use super::*;
- use crate::{lit, ScalarFunctionDefinition};
+ use crate::ScalarFunctionDefinition;
#[test]
fn filter_is_null_and_is_not_null() {
@@ -1216,7 +1210,6 @@ mod test {
test_nary_scalar_expr!(Btrim, btrim, string, characters);
test_scalar_expr!(CharacterLength, character_length, string);
test_scalar_expr!(Chr, chr, string);
- test_scalar_expr!(Digest, digest, string, algorithm);
test_scalar_expr!(Gcd, gcd, arg_1, arg_2);
test_scalar_expr!(Lcm, lcm, arg_1, arg_2);
test_scalar_expr!(InitCap, initcap, string);
@@ -1225,7 +1218,6 @@ mod test {
test_nary_scalar_expr!(Lpad, lpad, string, count);
test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
test_scalar_expr!(Ltrim, ltrim, string);
- test_scalar_expr!(MD5, md5, string);
test_scalar_expr!(OctetLength, octet_length, string);
test_scalar_expr!(Replace, replace, string, from, to);
test_scalar_expr!(Repeat, repeat, string, count);
@@ -1234,10 +1226,6 @@ mod test {
test_nary_scalar_expr!(Rpad, rpad, string, count);
test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
test_scalar_expr!(Rtrim, rtrim, string);
- test_scalar_expr!(SHA224, sha224, string);
- test_scalar_expr!(SHA256, sha256, string);
- test_scalar_expr!(SHA384, sha384, string);
- test_scalar_expr!(SHA512, sha512, string);
test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
test_scalar_expr!(StartsWith, starts_with, string, characters);
test_scalar_expr!(EndsWith, ends_with, string, characters);
@@ -1281,19 +1269,4 @@ mod test {
unreachable!();
}
}
-
- #[test]
- fn digest_function_definitions() {
- if let Expr::ScalarFunction(ScalarFunction {
- func_def: ScalarFunctionDefinition::BuiltIn(fun),
- args,
- }) = digest(col("tableA.a"), lit("md5"))
- {
- let name = BuiltinScalarFunction::Digest;
- assert_eq!(name, fun);
- assert_eq!(2, args.len());
- } else {
- unreachable!();
- }
- }
}
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 0a1215e246..92c80208e3 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -34,14 +34,21 @@ core_expressions = []
# enable datetime functions
datetime_expressions = []
# Enable encoding by default so the doctests work. In general don't
automatically enable all packages.
-default = ["core_expressions", "datetime_expressions", "encoding_expressions",
"math_expressions", "regex_expressions"]
+default = [
+ "core_expressions",
+ "datetime_expressions",
+ "encoding_expressions",
+ "math_expressions",
+ "regex_expressions",
+ "crypto_expressions",
+]
# enable encode/decode functions
encoding_expressions = ["base64", "hex"]
# enable math functions
math_expressions = []
# enable regular expressions
regex_expressions = ["regex"]
-
+crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
[lib]
name = "datafusion_functions"
path = "src/lib.rs"
@@ -52,6 +59,8 @@ path = "src/lib.rs"
arrow = { workspace = true }
arrow-array = { workspace = true }
base64 = { version = "0.22", optional = true }
+blake2 = { version = "^0.10.2", optional = true }
+blake3 = { version = "1.0", optional = true }
chrono = { workspace = true }
datafusion-common = { workspace = true }
datafusion-execution = { workspace = true }
@@ -60,8 +69,9 @@ datafusion-physical-expr = { workspace = true,
default-features = true }
hex = { version = "0.4", optional = true }
itertools = { workspace = true }
log = { workspace = true }
+md-5 = { version = "^0.10.0", optional = true }
regex = { version = "1.8", optional = true }
-
+sha2 = { version = "^0.10.1", optional = true }
[dev-dependencies]
criterion = "0.5"
rand = { workspace = true }
diff --git a/datafusion/physical-expr/src/crypto_expressions.rs
b/datafusion/functions/src/crypto/basic.rs
similarity index 90%
rename from datafusion/physical-expr/src/crypto_expressions.rs
rename to datafusion/functions/src/crypto/basic.rs
index 3ff3bc83f2..716afd84a9 100644
--- a/datafusion/physical-expr/src/crypto_expressions.rs
+++ b/datafusion/functions/src/crypto/basic.rs
@@ -15,35 +15,90 @@
// specific language governing permissions and limitations
// under the License.
-//! Crypto expressions
+//! "crypto" DataFusion functions
-use arrow::{
- array::{Array, ArrayRef, BinaryArray, OffsetSizeTrait, StringArray},
- datatypes::DataType,
-};
+use arrow::array::StringArray;
+use arrow::array::{Array, ArrayRef, BinaryArray, OffsetSizeTrait};
+use arrow::datatypes::DataType;
use blake2::{Blake2b512, Blake2s256, Digest};
use blake3::Hasher as Blake3;
+use datafusion_common::cast::as_binary_array;
+
+use datafusion_common::plan_err;
use datafusion_common::{
- cast::{as_binary_array, as_generic_binary_array, as_generic_string_array},
- plan_err,
+ cast::{as_generic_binary_array, as_generic_string_array},
+ exec_err, internal_err, DataFusionError, Result, ScalarValue,
};
-use datafusion_common::{exec_err, ScalarValue};
-use datafusion_common::{internal_err, DataFusionError, Result};
use datafusion_expr::ColumnarValue;
use md5::Md5;
use sha2::{Sha224, Sha256, Sha384, Sha512};
-use std::fmt::Write;
+use std::fmt::{self, Write};
+use std::str::FromStr;
use std::sync::Arc;
-use std::{fmt, str::FromStr};
-/// Digest algorithms.
-///
-/// Note that by default all digest algorithms return BinaryArray or Binary
scalar data.
-/// However md5 when invoked by its name (rather than digest(value, 'md5'))
would return
-/// hex encoded utf8 values, due to historical reasons. You are advised to
prefer to use
-/// digest(utf8, utf8) function.
+macro_rules! define_digest_function {
+ ($NAME: ident, $METHOD: ident, $DOC: expr) => {
+ #[doc = $DOC]
+ pub fn $NAME(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 1 {
+ return exec_err!(
+ "{:?} args were supplied but {} takes exactly one
argument",
+ args.len(),
+ DigestAlgorithm::$METHOD.to_string()
+ );
+ }
+ digest_process(&args[0], DigestAlgorithm::$METHOD)
+ }
+ };
+}
+define_digest_function!(
+ sha224,
+ Sha224,
+ "computes sha224 hash digest of the given input"
+);
+define_digest_function!(
+ sha256,
+ Sha256,
+ "computes sha256 hash digest of the given input"
+);
+define_digest_function!(
+ sha384,
+ Sha384,
+ "computes sha384 hash digest of the given input"
+);
+define_digest_function!(
+ sha512,
+ Sha512,
+ "computes sha512 hash digest of the given input"
+);
+define_digest_function!(
+ blake2b,
+ Blake2b,
+ "computes blake2b hash digest of the given input"
+);
+define_digest_function!(
+ blake2s,
+ Blake2s,
+ "computes blake2s hash digest of the given input"
+);
+define_digest_function!(
+ blake3,
+ Blake3,
+ "computes blake3 hash digest of the given input"
+);
+
+macro_rules! digest_to_scalar {
+ ($METHOD: ident, $INPUT:expr) => {{
+ ScalarValue::Binary($INPUT.as_ref().map(|v| {
+ let mut digest = $METHOD::default();
+ digest.update(v);
+ digest.finalize().as_slice().to_vec()
+ }))
+ }};
+}
+
#[derive(Debug, Copy, Clone)]
-enum DigestAlgorithm {
+pub enum DigestAlgorithm {
Md5,
Sha224,
Sha256,
@@ -54,36 +109,123 @@ enum DigestAlgorithm {
Blake3,
}
-fn digest_process(
- value: &ColumnarValue,
- digest_algorithm: DigestAlgorithm,
-) -> Result<ColumnarValue> {
- match value {
- ColumnarValue::Array(a) => match a.data_type() {
- DataType::Utf8 =>
digest_algorithm.digest_utf8_array::<i32>(a.as_ref()),
- DataType::LargeUtf8 =>
digest_algorithm.digest_utf8_array::<i64>(a.as_ref()),
- DataType::Binary =>
digest_algorithm.digest_binary_array::<i32>(a.as_ref()),
- DataType::LargeBinary => {
- digest_algorithm.digest_binary_array::<i64>(a.as_ref())
- }
- other => exec_err!(
- "Unsupported data type {other:?} for function
{digest_algorithm}"
- ),
- },
+/// Digest computes a binary hash of the given data, accepts Utf8 or LargeUtf8
and returns a [`ColumnarValue`].
+/// Second argument is the algorithm to use.
+/// Standard algorithms are md5, sha1, sha224, sha256, sha384 and sha512.
+pub fn digest(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 2 {
+ return exec_err!(
+ "{:?} args were supplied but digest takes exactly two arguments",
+ args.len()
+ );
+ }
+ let digest_algorithm = match &args[1] {
ColumnarValue::Scalar(scalar) => match scalar {
- ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
- Ok(digest_algorithm
- .digest_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
+ ScalarValue::Utf8(Some(method)) |
ScalarValue::LargeUtf8(Some(method)) => {
+ method.parse::<DigestAlgorithm>()
}
- ScalarValue::Binary(a) | ScalarValue::LargeBinary(a) =>
Ok(digest_algorithm
- .digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
- other => exec_err!(
- "Unsupported data type {other:?} for function
{digest_algorithm}"
- ),
+ other => exec_err!("Unsupported data type {other:?} for function
digest"),
},
+ ColumnarValue::Array(_) => {
+ internal_err!("Digest using dynamically decided method is not yet
supported")
+ }
+ }?;
+ digest_process(&args[0], digest_algorithm)
+}
+impl FromStr for DigestAlgorithm {
+ type Err = DataFusionError;
+ fn from_str(name: &str) -> Result<DigestAlgorithm> {
+ Ok(match name {
+ "md5" => Self::Md5,
+ "sha224" => Self::Sha224,
+ "sha256" => Self::Sha256,
+ "sha384" => Self::Sha384,
+ "sha512" => Self::Sha512,
+ "blake2b" => Self::Blake2b,
+ "blake2s" => Self::Blake2s,
+ "blake3" => Self::Blake3,
+ _ => {
+ let options = [
+ Self::Md5,
+ Self::Sha224,
+ Self::Sha256,
+ Self::Sha384,
+ Self::Sha512,
+ Self::Blake2s,
+ Self::Blake2b,
+ Self::Blake3,
+ ]
+ .iter()
+ .map(|i| i.to_string())
+ .collect::<Vec<_>>()
+ .join(", ");
+ return plan_err!(
+ "There is no built-in digest algorithm named '{name}',
currently supported algorithms are: {options}"
+ );
+ }
+ })
}
}
+impl fmt::Display for DigestAlgorithm {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", format!("{self:?}").to_lowercase())
+ }
+}
+// /// computes md5 hash digest of the given input
+pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.len() != 1 {
+ return exec_err!(
+ "{:?} args were supplied but {} takes exactly one argument",
+ args.len(),
+ DigestAlgorithm::Md5
+ );
+ }
+ let value = digest_process(&args[0], DigestAlgorithm::Md5)?;
+ // md5 requires special handling because of its unique utf8 return type
+ Ok(match value {
+ ColumnarValue::Array(array) => {
+ let binary_array = as_binary_array(&array)?;
+ let string_array: StringArray = binary_array
+ .iter()
+ .map(|opt| opt.map(hex_encode::<_>))
+ .collect();
+ ColumnarValue::Array(Arc::new(string_array))
+ }
+ ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
+ ColumnarValue::Scalar(ScalarValue::Utf8(opt.map(hex_encode::<_>)))
+ }
+ _ => return exec_err!("Impossibly got invalid results from digest"),
+ })
+}
+/// this function exists so that we do not need to pull in the crate hex. it
is only used by md5
+/// function below
+#[inline]
+fn hex_encode<T: AsRef<[u8]>>(data: T) -> String {
+ let mut s = String::with_capacity(data.as_ref().len() * 2);
+ for b in data.as_ref() {
+ // Writing to a string never errors, so we can unwrap here.
+ write!(&mut s, "{b:02x}").unwrap();
+ }
+ s
+}
+pub fn utf8_or_binary_to_binary_type(
+ arg_type: &DataType,
+ name: &str,
+) -> Result<DataType> {
+ Ok(match arg_type {
+ DataType::LargeUtf8
+ | DataType::Utf8
+ | DataType::Binary
+ | DataType::LargeBinary => DataType::Binary,
+ DataType::Null => DataType::Null,
+ _ => {
+ return plan_err!(
+ "The {name:?} function can only accept strings or binary
arrays."
+ );
+ }
+ })
+}
macro_rules! digest_to_array {
($METHOD:ident, $INPUT:expr) => {{
let binary_array: BinaryArray = $INPUT
@@ -99,20 +241,9 @@ macro_rules! digest_to_array {
Arc::new(binary_array)
}};
}
-
-macro_rules! digest_to_scalar {
- ($METHOD: ident, $INPUT:expr) => {{
- ScalarValue::Binary($INPUT.as_ref().map(|v| {
- let mut digest = $METHOD::default();
- digest.update(v);
- digest.finalize().as_slice().to_vec()
- }))
- }};
-}
-
impl DigestAlgorithm {
/// digest an optional string to its hash value, null values are returned
as is
- fn digest_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
+ pub fn digest_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
ColumnarValue::Scalar(match self {
Self::Md5 => digest_to_scalar!(Md5, value),
Self::Sha224 => digest_to_scalar!(Sha224, value),
@@ -130,7 +261,7 @@ impl DigestAlgorithm {
}
/// digest a binary array to their hash values
- fn digest_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
+ pub fn digest_binary_array<T>(self, value: &dyn Array) ->
Result<ColumnarValue>
where
T: OffsetSizeTrait,
{
@@ -161,7 +292,7 @@ impl DigestAlgorithm {
}
/// digest a string array to their hash values
- fn digest_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
+ pub fn digest_utf8_array<T>(self, value: &dyn Array) ->
Result<ColumnarValue>
where
T: OffsetSizeTrait,
{
@@ -191,159 +322,32 @@ impl DigestAlgorithm {
Ok(ColumnarValue::Array(array))
}
}
-
-impl fmt::Display for DigestAlgorithm {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}", format!("{self:?}").to_lowercase())
- }
-}
-
-impl FromStr for DigestAlgorithm {
- type Err = DataFusionError;
- fn from_str(name: &str) -> Result<DigestAlgorithm> {
- Ok(match name {
- "md5" => Self::Md5,
- "sha224" => Self::Sha224,
- "sha256" => Self::Sha256,
- "sha384" => Self::Sha384,
- "sha512" => Self::Sha512,
- "blake2b" => Self::Blake2b,
- "blake2s" => Self::Blake2s,
- "blake3" => Self::Blake3,
- _ => {
- let options = [
- Self::Md5,
- Self::Sha224,
- Self::Sha256,
- Self::Sha384,
- Self::Sha512,
- Self::Blake2s,
- Self::Blake2b,
- Self::Blake3,
- ]
- .iter()
- .map(|i| i.to_string())
- .collect::<Vec<_>>()
- .join(", ");
- return plan_err!(
- "There is no built-in digest algorithm named '{name}',
currently supported algorithms are: {options}"
- );
- }
- })
- }
-}
-
-macro_rules! define_digest_function {
- ($NAME: ident, $METHOD: ident, $DOC: expr) => {
- #[doc = $DOC]
- pub fn $NAME(args: &[ColumnarValue]) -> Result<ColumnarValue> {
- if args.len() != 1 {
- return exec_err!(
- "{:?} args were supplied but {} takes exactly one
argument",
- args.len(),
- DigestAlgorithm::$METHOD.to_string()
- );
+pub fn digest_process(
+ value: &ColumnarValue,
+ digest_algorithm: DigestAlgorithm,
+) -> Result<ColumnarValue> {
+ match value {
+ ColumnarValue::Array(a) => match a.data_type() {
+ DataType::Utf8 =>
digest_algorithm.digest_utf8_array::<i32>(a.as_ref()),
+ DataType::LargeUtf8 =>
digest_algorithm.digest_utf8_array::<i64>(a.as_ref()),
+ DataType::Binary =>
digest_algorithm.digest_binary_array::<i32>(a.as_ref()),
+ DataType::LargeBinary => {
+ digest_algorithm.digest_binary_array::<i64>(a.as_ref())
}
- digest_process(&args[0], DigestAlgorithm::$METHOD)
- }
- };
-}
-
-/// this function exists so that we do not need to pull in the crate hex. it
is only used by md5
-/// function below
-#[inline]
-fn hex_encode<T: AsRef<[u8]>>(data: T) -> String {
- let mut s = String::with_capacity(data.as_ref().len() * 2);
- for b in data.as_ref() {
- // Writing to a string never errors, so we can unwrap here.
- write!(&mut s, "{b:02x}").unwrap();
- }
- s
-}
-
-/// computes md5 hash digest of the given input
-pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
- if args.len() != 1 {
- return exec_err!(
- "{:?} args were supplied but {} takes exactly one argument",
- args.len(),
- DigestAlgorithm::Md5
- );
- }
- let value = digest_process(&args[0], DigestAlgorithm::Md5)?;
- // md5 requires special handling because of its unique utf8 return type
- Ok(match value {
- ColumnarValue::Array(array) => {
- let binary_array = as_binary_array(&array)?;
- let string_array: StringArray = binary_array
- .iter()
- .map(|opt| opt.map(hex_encode::<_>))
- .collect();
- ColumnarValue::Array(Arc::new(string_array))
- }
- ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
- ColumnarValue::Scalar(ScalarValue::Utf8(opt.map(hex_encode::<_>)))
- }
- _ => return exec_err!("Impossibly got invalid results from digest"),
- })
-}
-
-define_digest_function!(
- sha224,
- Sha224,
- "computes sha224 hash digest of the given input"
-);
-define_digest_function!(
- sha256,
- Sha256,
- "computes sha256 hash digest of the given input"
-);
-define_digest_function!(
- sha384,
- Sha384,
- "computes sha384 hash digest of the given input"
-);
-define_digest_function!(
- sha512,
- Sha512,
- "computes sha512 hash digest of the given input"
-);
-define_digest_function!(
- blake2b,
- Blake2b,
- "computes blake2b hash digest of the given input"
-);
-define_digest_function!(
- blake2s,
- Blake2s,
- "computes blake2s hash digest of the given input"
-);
-define_digest_function!(
- blake3,
- Blake3,
- "computes blake3 hash digest of the given input"
-);
-
-/// Digest computes a binary hash of the given data, accepts Utf8 or LargeUtf8
and returns a [`ColumnarValue`].
-/// Second argument is the algorithm to use.
-/// Standard algorithms are md5, sha1, sha224, sha256, sha384 and sha512.
-pub fn digest(args: &[ColumnarValue]) -> Result<ColumnarValue> {
- if args.len() != 2 {
- return exec_err!(
- "{:?} args were supplied but digest takes exactly two arguments",
- args.len()
- );
- }
- let digest_algorithm = match &args[1] {
+ other => exec_err!(
+ "Unsupported data type {other:?} for function
{digest_algorithm}"
+ ),
+ },
ColumnarValue::Scalar(scalar) => match scalar {
- ScalarValue::Utf8(Some(method)) |
ScalarValue::LargeUtf8(Some(method)) => {
- method.parse::<DigestAlgorithm>()
+ ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
+ Ok(digest_algorithm
+ .digest_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
}
- other => exec_err!("Unsupported data type {other:?} for function
digest"),
+ ScalarValue::Binary(a) | ScalarValue::LargeBinary(a) =>
Ok(digest_algorithm
+ .digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
+ other => exec_err!(
+ "Unsupported data type {other:?} for function
{digest_algorithm}"
+ ),
},
- ColumnarValue::Array(_) => {
- internal_err!("Digest using dynamically decided method is not yet
supported")
- }
- }?;
- digest_process(&args[0], digest_algorithm)
+ }
}
diff --git a/datafusion/functions/src/crypto/digest.rs
b/datafusion/functions/src/crypto/digest.rs
new file mode 100644
index 0000000000..c6556787cb
--- /dev/null
+++ b/datafusion/functions/src/crypto/digest.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "crypto" DataFusion functions
+use super::basic::{digest, utf8_or_binary_to_binary_type};
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
+use datafusion_expr::{
+ ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
+};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct DigestFunc {
+ signature: Signature,
+}
+impl DigestFunc {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::one_of(
+ vec![
+ Exact(vec![Utf8, Utf8]),
+ Exact(vec![LargeUtf8, Utf8]),
+ Exact(vec![Binary, Utf8]),
+ Exact(vec![LargeBinary, Utf8]),
+ ],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+impl ScalarUDFImpl for DigestFunc {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "digest"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ utf8_or_binary_to_binary_type(&arg_types[0], self.name())
+ }
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ digest(args)
+ }
+}
diff --git a/datafusion/functions/src/crypto/md5.rs
b/datafusion/functions/src/crypto/md5.rs
new file mode 100644
index 0000000000..7b2936a379
--- /dev/null
+++ b/datafusion/functions/src/crypto/md5.rs
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "crypto" DataFusion functions
+use crate::crypto::basic::md5;
+use arrow::datatypes::DataType;
+use datafusion_common::{plan_err, Result};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct Md5Func {
+ signature: Signature,
+}
+impl Md5Func {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::uniform(
+ 1,
+ vec![Utf8, LargeUtf8, Binary, LargeBinary],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+impl ScalarUDFImpl for Md5Func {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "md5"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ use DataType::*;
+ Ok(match &arg_types[0] {
+ LargeUtf8 | LargeBinary => LargeUtf8,
+ Utf8 | Binary => Utf8,
+ Null => Null,
+ Dictionary(_, t) => match **t {
+ LargeUtf8 | LargeBinary => LargeUtf8,
+ Utf8 | Binary => Utf8,
+ Null => Null,
+ _ => {
+ return plan_err!(
+ "the md5 can only accept strings but got {:?}",
+ **t
+ );
+ }
+ },
+ other => {
+ return plan_err!(
+ "The md5 function can only accept strings. Got {other}"
+ );
+ }
+ })
+ }
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ md5(args)
+ }
+}
diff --git a/datafusion/functions/src/crypto/mod.rs
b/datafusion/functions/src/crypto/mod.rs
new file mode 100644
index 0000000000..a879fdb45b
--- /dev/null
+++ b/datafusion/functions/src/crypto/mod.rs
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "crypto" DataFusion functions
+
+pub mod basic;
+pub mod digest;
+pub mod md5;
+pub mod sha224;
+pub mod sha256;
+pub mod sha384;
+pub mod sha512;
+make_udf_function!(digest::DigestFunc, DIGEST, digest);
+make_udf_function!(md5::Md5Func, MD5, md5);
+make_udf_function!(sha224::SHA224Func, SHA224, sha224);
+make_udf_function!(sha256::SHA256Func, SHA256, sha256);
+make_udf_function!(sha384::SHA384Func, SHA384, sha384);
+make_udf_function!(sha512::SHA512Func, SHA512, sha512);
+export_functions!((
+ digest,
+ input_arg1 input_arg2,
+ "Computes the binary hash of an expression using the specified algorithm."
+),(
+ md5,
+ input_arg,
+ "Computes an MD5 128-bit checksum for a string expression."
+),(
+ sha224,
+ input_arg1,
+ "Computes the SHA-224 hash of a binary string."
+),(
+ sha256,
+ input_arg1,
+ "Computes the SHA-256 hash of a binary string."
+),(
+ sha384,
+ input_arg1,
+ "Computes the SHA-384 hash of a binary string."
+),(
+ sha512,
+ input_arg1,
+ "Computes the SHA-512 hash of a binary string."
+));
diff --git a/datafusion/functions/src/crypto/sha224.rs
b/datafusion/functions/src/crypto/sha224.rs
new file mode 100644
index 0000000000..ef0fae97cf
--- /dev/null
+++ b/datafusion/functions/src/crypto/sha224.rs
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "crypto" DataFusion functions
+use super::basic::{sha224, utf8_or_binary_to_binary_type};
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct SHA224Func {
+ signature: Signature,
+}
+impl SHA224Func {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::uniform(
+ 1,
+ vec![Utf8, LargeUtf8, Binary, LargeBinary],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+impl ScalarUDFImpl for SHA224Func {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "sha224"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ utf8_or_binary_to_binary_type(&arg_types[0], self.name())
+ }
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ sha224(args)
+ }
+}
diff --git a/datafusion/functions/src/crypto/sha256.rs
b/datafusion/functions/src/crypto/sha256.rs
new file mode 100644
index 0000000000..f763f925cc
--- /dev/null
+++ b/datafusion/functions/src/crypto/sha256.rs
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "crypto" DataFusion functions
+use super::basic::{sha256, utf8_or_binary_to_binary_type};
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct SHA256Func {
+ signature: Signature,
+}
+impl SHA256Func {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::uniform(
+ 1,
+ vec![Utf8, LargeUtf8, Binary, LargeBinary],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+impl ScalarUDFImpl for SHA256Func {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "sha256"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ utf8_or_binary_to_binary_type(&arg_types[0], self.name())
+ }
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ sha256(args)
+ }
+}
diff --git a/datafusion/functions/src/crypto/sha384.rs
b/datafusion/functions/src/crypto/sha384.rs
new file mode 100644
index 0000000000..b382d42663
--- /dev/null
+++ b/datafusion/functions/src/crypto/sha384.rs
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "crypto" DataFusion functions
+use super::basic::{sha384, utf8_or_binary_to_binary_type};
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct SHA384Func {
+ signature: Signature,
+}
+impl SHA384Func {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::uniform(
+ 1,
+ vec![Utf8, LargeUtf8, Binary, LargeBinary],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+impl ScalarUDFImpl for SHA384Func {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "sha384"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ utf8_or_binary_to_binary_type(&arg_types[0], self.name())
+ }
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ sha384(args)
+ }
+}
diff --git a/datafusion/functions/src/crypto/sha512.rs
b/datafusion/functions/src/crypto/sha512.rs
new file mode 100644
index 0000000000..a852376fad
--- /dev/null
+++ b/datafusion/functions/src/crypto/sha512.rs
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "crypto" DataFusion functions
+use super::basic::{sha512, utf8_or_binary_to_binary_type};
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+
+#[derive(Debug)]
+pub(super) struct SHA512Func {
+ signature: Signature,
+}
+impl SHA512Func {
+ pub fn new() -> Self {
+ use DataType::*;
+ Self {
+ signature: Signature::uniform(
+ 1,
+ vec![Utf8, LargeUtf8, Binary, LargeBinary],
+ Volatility::Immutable,
+ ),
+ }
+ }
+}
+impl ScalarUDFImpl for SHA512Func {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "sha512"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ utf8_or_binary_to_binary_type(&arg_types[0], self.name())
+ }
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ sha512(args)
+ }
+}
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 1d48dcadbe..3a2eab8e5f 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -116,10 +116,16 @@ make_stub_package!(math, "math_expressions");
pub mod regex;
make_stub_package!(regex, "regex_expressions");
+#[cfg(feature = "crypto_expressions")]
+pub mod crypto;
+make_stub_package!(crypto, "crypto_expressions");
+
/// Fluent-style API for creating `Expr`s
pub mod expr_fn {
#[cfg(feature = "core_expressions")]
pub use super::core::expr_fn::*;
+ #[cfg(feature = "crypto_expressions")]
+ pub use super::crypto::expr_fn::*;
#[cfg(feature = "datetime_expressions")]
pub use super::datetime::expr_fn::*;
#[cfg(feature = "encoding_expressions")]
@@ -137,7 +143,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) ->
Result<()> {
.chain(datetime::functions())
.chain(encoding::functions())
.chain(math::functions())
- .chain(regex::functions());
+ .chain(regex::functions())
+ .chain(crypto::functions());
all_functions.try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 072e4ba47e..ff653192c0 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -84,51 +84,6 @@ pub fn create_physical_expr(
)))
}
-#[cfg(feature = "crypto_expressions")]
-macro_rules! invoke_if_crypto_expressions_feature_flag {
- ($FUNC:ident, $NAME:expr) => {{
- use crate::crypto_expressions;
- crypto_expressions::$FUNC
- }};
-}
-
-#[cfg(not(feature = "crypto_expressions"))]
-macro_rules! invoke_if_crypto_expressions_feature_flag {
- ($FUNC:ident, $NAME:expr) => {
- use datafusion_common::internal_err;
- |_: &[ColumnarValue]| -> Result<ColumnarValue> {
- internal_err!(
- "function {} requires compilation with feature flag:
crypto_expressions.",
- $NAME
- )
- }
- };
-}
-
-#[cfg(not(feature = "regex_expressions"))]
-macro_rules! invoke_on_array_if_regex_expressions_feature_flag {
- ($FUNC:ident, $T:tt, $NAME:expr) => {
- |_: &[ArrayRef]| -> Result<ArrayRef> {
- internal_err!(
- "function {} requires compilation with feature flag:
regex_expressions.",
- $NAME
- )
- }
- };
-}
-
-#[cfg(not(feature = "regex_expressions"))]
-macro_rules! invoke_on_columnar_value_if_regex_expressions_feature_flag {
- ($FUNC:ident, $T:tt, $NAME:expr) => {
- |_: &[ColumnarValue]| -> Result<ScalarFunctionImplementation> {
- internal_err!(
- "function {} requires compilation with feature flag:
regex_expressions.",
- $NAME
- )
- }
- };
-}
-
#[cfg(feature = "unicode_expressions")]
macro_rules! invoke_if_unicode_expressions_feature_flag {
($FUNC:ident, $T:tt, $NAME:expr) => {{
@@ -459,12 +414,6 @@ pub fn create_physical_fun(
}
other => exec_err!("Unsupported data type {other:?} for function
ltrim"),
}),
- BuiltinScalarFunction::MD5 => {
- Arc::new(invoke_if_crypto_expressions_feature_flag!(md5, "md5"))
- }
- BuiltinScalarFunction::Digest => {
- Arc::new(invoke_if_crypto_expressions_feature_flag!(digest,
"digest"))
- }
BuiltinScalarFunction::OctetLength => Arc::new(|args| match &args[0] {
ColumnarValue::Array(v) =>
Ok(ColumnarValue::Array(length(v.as_ref())?)),
ColumnarValue::Scalar(v) => match v {
@@ -545,18 +494,6 @@ pub fn create_physical_fun(
}
other => exec_err!("Unsupported data type {other:?} for function
rtrim"),
}),
- BuiltinScalarFunction::SHA224 => {
- Arc::new(invoke_if_crypto_expressions_feature_flag!(sha224,
"sha224"))
- }
- BuiltinScalarFunction::SHA256 => {
- Arc::new(invoke_if_crypto_expressions_feature_flag!(sha256,
"sha256"))
- }
- BuiltinScalarFunction::SHA384 => {
- Arc::new(invoke_if_crypto_expressions_feature_flag!(sha384,
"sha384"))
- }
- BuiltinScalarFunction::SHA512 => {
- Arc::new(invoke_if_crypto_expressions_feature_flag!(sha512,
"sha512"))
- }
BuiltinScalarFunction::SplitPart => Arc::new(|args| match
args[0].data_type() {
DataType::Utf8 => {
make_scalar_function_inner(string_expressions::split_part::<i32>)(args)
@@ -796,8 +733,8 @@ mod tests {
use crate::expressions::try_cast;
use arrow::{
array::{
- Array, ArrayRef, BinaryArray, BooleanArray, Float32Array,
Float64Array,
- Int32Array, StringArray, UInt64Array,
+ Array, ArrayRef, BooleanArray, Float32Array, Float64Array,
Int32Array,
+ StringArray, UInt64Array,
},
datatypes::Field,
record_batch::RecordBatch,
@@ -1486,44 +1423,6 @@ mod tests {
Utf8,
StringArray
);
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- MD5,
- &[lit("tom")],
- Ok(Some("34b7da764b21d298ef307d04d8152dc5")),
- &str,
- Utf8,
- StringArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- MD5,
- &[lit("")],
- Ok(Some("d41d8cd98f00b204e9800998ecf8427e")),
- &str,
- Utf8,
- StringArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- MD5,
- &[lit(ScalarValue::Utf8(None))],
- Ok(None),
- &str,
- Utf8,
- StringArray
- );
- #[cfg(not(feature = "crypto_expressions"))]
- test_function!(
- MD5,
- &[lit("tom")],
- internal_err!(
- "function md5 requires compilation with feature flag:
crypto_expressions."
- ),
- &str,
- Utf8,
- StringArray
- );
test_function!(
OctetLength,
&[lit("chars")],
@@ -1920,200 +1819,6 @@ mod tests {
Utf8,
StringArray
);
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA224,
- &[lit("tom")],
- Ok(Some(&[
- 11u8, 246u8, 203u8, 98u8, 100u8, 156u8, 66u8, 169u8, 174u8,
56u8, 118u8,
- 171u8, 111u8, 109u8, 146u8, 173u8, 54u8, 203u8, 84u8, 20u8,
228u8, 149u8,
- 248u8, 135u8, 50u8, 146u8, 190u8, 77u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA224,
- &[lit("")],
- Ok(Some(&[
- 209u8, 74u8, 2u8, 140u8, 42u8, 58u8, 43u8, 201u8, 71u8, 97u8,
2u8, 187u8,
- 40u8, 130u8, 52u8, 196u8, 21u8, 162u8, 176u8, 31u8, 130u8,
142u8, 166u8,
- 42u8, 197u8, 179u8, 228u8, 47u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA224,
- &[lit(ScalarValue::Utf8(None))],
- Ok(None),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(not(feature = "crypto_expressions"))]
- test_function!(
- SHA224,
- &[lit("tom")],
- internal_err!(
- "function sha224 requires compilation with feature flag:
crypto_expressions."
- ),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA256,
- &[lit("tom")],
- Ok(Some(&[
- 225u8, 96u8, 143u8, 117u8, 197u8, 215u8, 129u8, 63u8, 61u8,
64u8, 49u8,
- 203u8, 48u8, 191u8, 183u8, 134u8, 80u8, 125u8, 152u8, 19u8,
117u8, 56u8,
- 255u8, 142u8, 18u8, 138u8, 111u8, 247u8, 78u8, 132u8, 230u8,
67u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA256,
- &[lit("")],
- Ok(Some(&[
- 227u8, 176u8, 196u8, 66u8, 152u8, 252u8, 28u8, 20u8, 154u8,
251u8, 244u8,
- 200u8, 153u8, 111u8, 185u8, 36u8, 39u8, 174u8, 65u8, 228u8,
100u8, 155u8,
- 147u8, 76u8, 164u8, 149u8, 153u8, 27u8, 120u8, 82u8, 184u8,
85u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA256,
- &[lit(ScalarValue::Utf8(None))],
- Ok(None),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(not(feature = "crypto_expressions"))]
- test_function!(
- SHA256,
- &[lit("tom")],
- internal_err!(
- "function sha256 requires compilation with feature flag:
crypto_expressions."
- ),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA384,
- &[lit("tom")],
- Ok(Some(&[
- 9u8, 111u8, 91u8, 104u8, 170u8, 119u8, 132u8, 142u8, 79u8,
223u8, 92u8,
- 28u8, 11u8, 53u8, 13u8, 226u8, 219u8, 250u8, 214u8, 15u8,
253u8, 124u8,
- 37u8, 217u8, 234u8, 7u8, 198u8, 193u8, 155u8, 138u8, 77u8,
85u8, 169u8,
- 24u8, 126u8, 177u8, 23u8, 197u8, 87u8, 136u8, 63u8, 88u8,
193u8, 109u8,
- 250u8, 195u8, 227u8, 67u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA384,
- &[lit("")],
- Ok(Some(&[
- 56u8, 176u8, 96u8, 167u8, 81u8, 172u8, 150u8, 56u8, 76u8,
217u8, 50u8,
- 126u8, 177u8, 177u8, 227u8, 106u8, 33u8, 253u8, 183u8, 17u8,
20u8, 190u8,
- 7u8, 67u8, 76u8, 12u8, 199u8, 191u8, 99u8, 246u8, 225u8,
218u8, 39u8,
- 78u8, 222u8, 191u8, 231u8, 111u8, 101u8, 251u8, 213u8, 26u8,
210u8,
- 241u8, 72u8, 152u8, 185u8, 91u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA384,
- &[lit(ScalarValue::Utf8(None))],
- Ok(None),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(not(feature = "crypto_expressions"))]
- test_function!(
- SHA384,
- &[lit("tom")],
- internal_err!(
- "function sha384 requires compilation with feature flag:
crypto_expressions."
- ),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA512,
- &[lit("tom")],
- Ok(Some(&[
- 110u8, 27u8, 155u8, 63u8, 232u8, 64u8, 104u8, 14u8, 55u8, 5u8,
31u8,
- 122u8, 213u8, 233u8, 89u8, 214u8, 243u8, 154u8, 208u8, 248u8,
136u8,
- 93u8, 133u8, 81u8, 102u8, 245u8, 92u8, 101u8, 148u8, 105u8,
211u8, 200u8,
- 183u8, 129u8, 24u8, 196u8, 74u8, 42u8, 73u8, 199u8, 45u8,
219u8, 72u8,
- 28u8, 214u8, 216u8, 115u8, 16u8, 52u8, 225u8, 28u8, 192u8,
48u8, 7u8,
- 11u8, 168u8, 67u8, 169u8, 11u8, 52u8, 149u8, 203u8, 141u8, 62u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA512,
- &[lit("")],
- Ok(Some(&[
- 207u8, 131u8, 225u8, 53u8, 126u8, 239u8, 184u8, 189u8, 241u8,
84u8, 40u8,
- 80u8, 214u8, 109u8, 128u8, 7u8, 214u8, 32u8, 228u8, 5u8, 11u8,
87u8,
- 21u8, 220u8, 131u8, 244u8, 169u8, 33u8, 211u8, 108u8, 233u8,
206u8, 71u8,
- 208u8, 209u8, 60u8, 93u8, 133u8, 242u8, 176u8, 255u8, 131u8,
24u8, 210u8,
- 135u8, 126u8, 236u8, 47u8, 99u8, 185u8, 49u8, 189u8, 71u8,
65u8, 122u8,
- 129u8, 165u8, 56u8, 50u8, 122u8, 249u8, 39u8, 218u8, 62u8
- ])),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(feature = "crypto_expressions")]
- test_function!(
- SHA512,
- &[lit(ScalarValue::Utf8(None))],
- Ok(None),
- &[u8],
- Binary,
- BinaryArray
- );
- #[cfg(not(feature = "crypto_expressions"))]
- test_function!(
- SHA512,
- &[lit("tom")],
- internal_err!(
- "function sha512 requires compilation with feature flag:
crypto_expressions."
- ),
- &[u8],
- Binary,
- BinaryArray
- );
test_function!(
SplitPart,
&[
diff --git a/datafusion/physical-expr/src/lib.rs
b/datafusion/physical-expr/src/lib.rs
index 07bccf25c8..6d5d7e85c6 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -20,8 +20,6 @@ pub mod analysis;
pub mod array_expressions;
pub mod binary_map;
pub mod conditional_expressions;
-#[cfg(feature = "crypto_expressions")]
-pub mod crypto_expressions;
pub mod datetime_expressions;
pub mod equivalence;
pub mod expressions;
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index e6ee41fadb..b4cab05a28 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -548,7 +548,7 @@ enum ScalarFunction {
Ascii = 4;
Ceil = 5;
Cos = 6;
- Digest = 7;
+ // 7 was Digest
Exp = 8;
Floor = 9;
Ln = 10;
@@ -576,7 +576,7 @@ enum ScalarFunction {
Lpad = 32;
Lower = 33;
Ltrim = 34;
- MD5 = 35;
+ // 35 was MD5
// 36 was NullIf
OctetLength = 37;
Random = 38;
@@ -587,10 +587,10 @@ enum ScalarFunction {
Right = 43;
Rpad = 44;
Rtrim = 45;
- SHA224 = 46;
- SHA256 = 47;
- SHA384 = 48;
- SHA512 = 49;
+ // 46 was SHA224
+ // 47 was SHA256
+ // 48 was SHA384
+ // 49 was SHA512
SplitPart = 50;
StartsWith = 51;
Strpos = 52;
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index be26ccee18..7d565e4810 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -23116,7 +23116,6 @@ impl serde::Serialize for ScalarFunction {
Self::Ascii => "Ascii",
Self::Ceil => "Ceil",
Self::Cos => "Cos",
- Self::Digest => "Digest",
Self::Exp => "Exp",
Self::Floor => "Floor",
Self::Ln => "Ln",
@@ -23139,7 +23138,6 @@ impl serde::Serialize for ScalarFunction {
Self::Lpad => "Lpad",
Self::Lower => "Lower",
Self::Ltrim => "Ltrim",
- Self::Md5 => "MD5",
Self::OctetLength => "OctetLength",
Self::Random => "Random",
Self::Repeat => "Repeat",
@@ -23148,10 +23146,6 @@ impl serde::Serialize for ScalarFunction {
Self::Right => "Right",
Self::Rpad => "Rpad",
Self::Rtrim => "Rtrim",
- Self::Sha224 => "SHA224",
- Self::Sha256 => "SHA256",
- Self::Sha384 => "SHA384",
- Self::Sha512 => "SHA512",
Self::SplitPart => "SplitPart",
Self::StartsWith => "StartsWith",
Self::Strpos => "Strpos",
@@ -23219,7 +23213,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Ascii",
"Ceil",
"Cos",
- "Digest",
"Exp",
"Floor",
"Ln",
@@ -23242,7 +23235,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Lpad",
"Lower",
"Ltrim",
- "MD5",
"OctetLength",
"Random",
"Repeat",
@@ -23251,10 +23243,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Right",
"Rpad",
"Rtrim",
- "SHA224",
- "SHA256",
- "SHA384",
- "SHA512",
"SplitPart",
"StartsWith",
"Strpos",
@@ -23351,7 +23339,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Ascii" => Ok(ScalarFunction::Ascii),
"Ceil" => Ok(ScalarFunction::Ceil),
"Cos" => Ok(ScalarFunction::Cos),
- "Digest" => Ok(ScalarFunction::Digest),
"Exp" => Ok(ScalarFunction::Exp),
"Floor" => Ok(ScalarFunction::Floor),
"Ln" => Ok(ScalarFunction::Ln),
@@ -23374,7 +23361,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Lpad" => Ok(ScalarFunction::Lpad),
"Lower" => Ok(ScalarFunction::Lower),
"Ltrim" => Ok(ScalarFunction::Ltrim),
- "MD5" => Ok(ScalarFunction::Md5),
"OctetLength" => Ok(ScalarFunction::OctetLength),
"Random" => Ok(ScalarFunction::Random),
"Repeat" => Ok(ScalarFunction::Repeat),
@@ -23383,10 +23369,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Right" => Ok(ScalarFunction::Right),
"Rpad" => Ok(ScalarFunction::Rpad),
"Rtrim" => Ok(ScalarFunction::Rtrim),
- "SHA224" => Ok(ScalarFunction::Sha224),
- "SHA256" => Ok(ScalarFunction::Sha256),
- "SHA384" => Ok(ScalarFunction::Sha384),
- "SHA512" => Ok(ScalarFunction::Sha512),
"SplitPart" => Ok(ScalarFunction::SplitPart),
"StartsWith" => Ok(ScalarFunction::StartsWith),
"Strpos" => Ok(ScalarFunction::Strpos),
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index 54d3bffae1..80f529196f 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2850,7 +2850,7 @@ pub enum ScalarFunction {
Ascii = 4,
Ceil = 5,
Cos = 6,
- Digest = 7,
+ /// 7 was Digest
Exp = 8,
Floor = 9,
Ln = 10,
@@ -2878,7 +2878,7 @@ pub enum ScalarFunction {
Lpad = 32,
Lower = 33,
Ltrim = 34,
- Md5 = 35,
+ /// 35 was MD5
/// 36 was NullIf
OctetLength = 37,
Random = 38,
@@ -2889,10 +2889,10 @@ pub enum ScalarFunction {
Right = 43,
Rpad = 44,
Rtrim = 45,
- Sha224 = 46,
- Sha256 = 47,
- Sha384 = 48,
- Sha512 = 49,
+ /// 46 was SHA224
+ /// 47 was SHA256
+ /// 48 was SHA384
+ /// 49 was SHA512
SplitPart = 50,
StartsWith = 51,
Strpos = 52,
@@ -2994,7 +2994,6 @@ impl ScalarFunction {
ScalarFunction::Ascii => "Ascii",
ScalarFunction::Ceil => "Ceil",
ScalarFunction::Cos => "Cos",
- ScalarFunction::Digest => "Digest",
ScalarFunction::Exp => "Exp",
ScalarFunction::Floor => "Floor",
ScalarFunction::Ln => "Ln",
@@ -3017,7 +3016,6 @@ impl ScalarFunction {
ScalarFunction::Lpad => "Lpad",
ScalarFunction::Lower => "Lower",
ScalarFunction::Ltrim => "Ltrim",
- ScalarFunction::Md5 => "MD5",
ScalarFunction::OctetLength => "OctetLength",
ScalarFunction::Random => "Random",
ScalarFunction::Repeat => "Repeat",
@@ -3026,10 +3024,6 @@ impl ScalarFunction {
ScalarFunction::Right => "Right",
ScalarFunction::Rpad => "Rpad",
ScalarFunction::Rtrim => "Rtrim",
- ScalarFunction::Sha224 => "SHA224",
- ScalarFunction::Sha256 => "SHA256",
- ScalarFunction::Sha384 => "SHA384",
- ScalarFunction::Sha512 => "SHA512",
ScalarFunction::SplitPart => "SplitPart",
ScalarFunction::StartsWith => "StartsWith",
ScalarFunction::Strpos => "Strpos",
@@ -3091,7 +3085,6 @@ impl ScalarFunction {
"Ascii" => Some(Self::Ascii),
"Ceil" => Some(Self::Ceil),
"Cos" => Some(Self::Cos),
- "Digest" => Some(Self::Digest),
"Exp" => Some(Self::Exp),
"Floor" => Some(Self::Floor),
"Ln" => Some(Self::Ln),
@@ -3114,7 +3107,6 @@ impl ScalarFunction {
"Lpad" => Some(Self::Lpad),
"Lower" => Some(Self::Lower),
"Ltrim" => Some(Self::Ltrim),
- "MD5" => Some(Self::Md5),
"OctetLength" => Some(Self::OctetLength),
"Random" => Some(Self::Random),
"Repeat" => Some(Self::Repeat),
@@ -3123,10 +3115,6 @@ impl ScalarFunction {
"Right" => Some(Self::Right),
"Rpad" => Some(Self::Rpad),
"Rtrim" => Some(Self::Rtrim),
- "SHA224" => Some(Self::Sha224),
- "SHA256" => Some(Self::Sha256),
- "SHA384" => Some(Self::Sha384),
- "SHA512" => Some(Self::Sha512),
"SplitPart" => Some(Self::SplitPart),
"StartsWith" => Some(Self::StartsWith),
"Strpos" => Some(Self::Strpos),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index fb7e82d51c..b229c1c699 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -52,17 +52,17 @@ use datafusion_expr::{
array_replace, array_replace_all, array_replace_n, array_resize,
array_slice,
array_union, ascii, asinh, atan, atan2, atanh, bit_length, btrim, cbrt,
ceil,
character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh,
cot,
- degrees, digest, ends_with, exp,
+ degrees, ends_with, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, find_in_set, floor, gcd, initcap, iszero, lcm, left,
levenshtein, ln, log,
log10, log2,
logical_plan::{PlanType, StringifiedPlan},
- lower, lpad, ltrim, md5, nanvl, octet_length, overlay, pi, power, radians,
random,
- repeat, replace, reverse, right, round, rpad, rtrim, sha224, sha256,
sha384, sha512,
- signum, sin, sinh, split_part, sqrt, starts_with, strpos, substr,
substr_index,
- substring, to_hex, translate, trim, trunc, upper, uuid, AggregateFunction,
Between,
- BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr,
- GetFieldAccess, GetIndexedField, GroupingSet,
+ lower, lpad, ltrim, nanvl, octet_length, overlay, pi, power, radians,
random, repeat,
+ replace, reverse, right, round, rpad, rtrim, signum, sin, sinh,
split_part, sqrt,
+ starts_with, strpos, substr, substr_index, substring, to_hex, translate,
trim, trunc,
+ upper, uuid, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction,
+ BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField,
+ GroupingSet,
GroupingSet::GroupingSets,
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame,
WindowFrameBound,
WindowFrameUnits,
@@ -489,12 +489,6 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
ScalarFunction::ArrayUnion => Self::ArrayUnion,
ScalarFunction::ArrayResize => Self::ArrayResize,
- ScalarFunction::Md5 => Self::MD5,
- ScalarFunction::Sha224 => Self::SHA224,
- ScalarFunction::Sha256 => Self::SHA256,
- ScalarFunction::Sha384 => Self::SHA384,
- ScalarFunction::Sha512 => Self::SHA512,
- ScalarFunction::Digest => Self::Digest,
ScalarFunction::Log2 => Self::Log2,
ScalarFunction::Signum => Self::Signum,
ScalarFunction::Ascii => Self::Ascii,
@@ -1524,23 +1518,6 @@ pub fn parse_expr(
ScalarFunction::Rtrim => {
Ok(rtrim(parse_expr(&args[0], registry, codec)?))
}
- ScalarFunction::Sha224 => {
- Ok(sha224(parse_expr(&args[0], registry, codec)?))
- }
- ScalarFunction::Sha256 => {
- Ok(sha256(parse_expr(&args[0], registry, codec)?))
- }
- ScalarFunction::Sha384 => {
- Ok(sha384(parse_expr(&args[0], registry, codec)?))
- }
- ScalarFunction::Sha512 => {
- Ok(sha512(parse_expr(&args[0], registry, codec)?))
- }
- ScalarFunction::Md5 => Ok(md5(parse_expr(&args[0], registry,
codec)?)),
- ScalarFunction::Digest => Ok(digest(
- parse_expr(&args[0], registry, codec)?,
- parse_expr(&args[1], registry, codec)?,
- )),
ScalarFunction::Ascii => {
Ok(ascii(parse_expr(&args[0], registry, codec)?))
}
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 65b4c8ba04..40f195c9f1 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1471,12 +1471,6 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion,
- BuiltinScalarFunction::MD5 => Self::Md5,
- BuiltinScalarFunction::SHA224 => Self::Sha224,
- BuiltinScalarFunction::SHA256 => Self::Sha256,
- BuiltinScalarFunction::SHA384 => Self::Sha384,
- BuiltinScalarFunction::SHA512 => Self::Sha512,
- BuiltinScalarFunction::Digest => Self::Digest,
BuiltinScalarFunction::Log2 => Self::Log2,
BuiltinScalarFunction::Signum => Self::Signum,
BuiltinScalarFunction::Ascii => Self::Ascii,