This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new d5d9d30ee6 Crypto Function Migration (#12840)
d5d9d30ee6 is described below
commit d5d9d30ee6637f1de8ec7b8d062c75f89a140c64
Author: Jonathan Chen <[email protected]>
AuthorDate: Thu Oct 10 16:49:56 2024 -0400
Crypto Function Migration (#12840)
* Update crypto docs
* delete old
* fmt checks
---
datafusion/functions/src/crypto/digest.rs | 48 ++++++-
datafusion/functions/src/crypto/md5.rs | 34 ++++-
datafusion/functions/src/crypto/sha224.rs | 10 ++
datafusion/functions/src/crypto/sha256.rs | 35 +++++-
datafusion/functions/src/crypto/sha384.rs | 35 +++++-
datafusion/functions/src/crypto/sha512.rs | 35 +++++-
docs/source/user-guide/sql/scalar_functions.md | 84 -------------
docs/source/user-guide/sql/scalar_functions_new.md | 140 +++++++++++++++++++++
8 files changed, 332 insertions(+), 89 deletions(-)
diff --git a/datafusion/functions/src/crypto/digest.rs
b/datafusion/functions/src/crypto/digest.rs
index c9dd3c1f56..9ec07b1cab 100644
--- a/datafusion/functions/src/crypto/digest.rs
+++ b/datafusion/functions/src/crypto/digest.rs
@@ -19,10 +19,12 @@
use super::basic::{digest, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
- ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*,
Volatility,
};
use std::any::Any;
+use std::sync::OnceLock;
#[derive(Debug)]
pub struct DigestFunc {
@@ -69,4 +71,48 @@ impl ScalarUDFImpl for DigestFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
digest(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_digest_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_digest_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_HASHING)
+ .with_description(
+ "Computes the binary hash of an expression using the specified
algorithm.",
+ )
+ .with_syntax_example("digest(expression, algorithm)")
+ .with_sql_example(
+ r#"```sql
+> select digest('foo', 'sha256');
++------------------------------------------+
+| digest(Utf8("foo"), Utf8("sha256")) |
++------------------------------------------+
+| <binary_hash_result> |
++------------------------------------------+
+```"#,
+ )
+ .with_standard_argument(
+ "expression", "String")
+ .with_argument(
+ "algorithm",
+ "String expression specifying algorithm to use. Must be one of:
+
+- md5
+- sha224
+- sha256
+- sha384
+- sha512
+- blake2s
+- blake2b
+- blake3",
+ )
+ .build()
+ .unwrap()
+ })
}
diff --git a/datafusion/functions/src/crypto/md5.rs
b/datafusion/functions/src/crypto/md5.rs
index ccb6fbba80..f273c9d28c 100644
--- a/datafusion/functions/src/crypto/md5.rs
+++ b/datafusion/functions/src/crypto/md5.rs
@@ -19,8 +19,12 @@
use crate::crypto::basic::md5;
use arrow::datatypes::DataType;
use datafusion_common::{plan_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
use std::any::Any;
+use std::sync::OnceLock;
#[derive(Debug)]
pub struct Md5Func {
@@ -84,4 +88,32 @@ impl ScalarUDFImpl for Md5Func {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
md5(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_md5_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_md5_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_HASHING)
+ .with_description("Computes an MD5 128-bit checksum for a string
expression.")
+ .with_syntax_example("md5(expression)")
+ .with_sql_example(
+ r#"```sql
+> select md5('foo');
++-------------------------------------+
+| md5(Utf8("foo")) |
++-------------------------------------+
+| <md5_checksum_result> |
++-------------------------------------+
+```"#,
+ )
+ .with_standard_argument("expression", "String")
+ .build()
+ .unwrap()
+ })
}
diff --git a/datafusion/functions/src/crypto/sha224.rs
b/datafusion/functions/src/crypto/sha224.rs
index d603e5bcf2..868c8cdc35 100644
--- a/datafusion/functions/src/crypto/sha224.rs
+++ b/datafusion/functions/src/crypto/sha224.rs
@@ -58,6 +58,16 @@ fn get_sha224_doc() -> &'static Documentation {
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-224 hash of a binary string.")
.with_syntax_example("sha224(expression)")
+ .with_sql_example(
+ r#"```sql
+> select sha224('foo');
++------------------------------------------+
+| sha224(Utf8("foo")) |
++------------------------------------------+
+| <sha224_hash_result> |
++------------------------------------------+
+```"#,
+ )
.with_standard_argument("expression", "String")
.build()
.unwrap()
diff --git a/datafusion/functions/src/crypto/sha256.rs
b/datafusion/functions/src/crypto/sha256.rs
index 0a3f3b26e4..99a470efbc 100644
--- a/datafusion/functions/src/crypto/sha256.rs
+++ b/datafusion/functions/src/crypto/sha256.rs
@@ -19,8 +19,12 @@
use super::basic::{sha256, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
use std::any::Any;
+use std::sync::OnceLock;
#[derive(Debug)]
pub struct SHA256Func {
@@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA256Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}
+
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha256(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_sha256_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_sha256_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_HASHING)
+ .with_description("Computes the SHA-256 hash of a binary string.")
+ .with_syntax_example("sha256(expression)")
+ .with_sql_example(
+ r#"```sql
+> select sha256('foo');
++--------------------------------------+
+| sha256(Utf8("foo")) |
++--------------------------------------+
+| <sha256_hash_result> |
++--------------------------------------+
+```"#,
+ )
+ .with_standard_argument("expression", "String")
+ .build()
+ .unwrap()
+ })
}
diff --git a/datafusion/functions/src/crypto/sha384.rs
b/datafusion/functions/src/crypto/sha384.rs
index c3f7845ce7..afe2db7478 100644
--- a/datafusion/functions/src/crypto/sha384.rs
+++ b/datafusion/functions/src/crypto/sha384.rs
@@ -19,8 +19,12 @@
use super::basic::{sha384, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
use std::any::Any;
+use std::sync::OnceLock;
#[derive(Debug)]
pub struct SHA384Func {
@@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA384Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}
+
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha384(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_sha384_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_sha384_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_HASHING)
+ .with_description("Computes the SHA-384 hash of a binary string.")
+ .with_syntax_example("sha384(expression)")
+ .with_sql_example(
+ r#"```sql
+> select sha384('foo');
++-----------------------------------------+
+| sha384(Utf8("foo")) |
++-----------------------------------------+
+| <sha384_hash_result> |
++-----------------------------------------+
+```"#,
+ )
+ .with_standard_argument("expression", "String")
+ .build()
+ .unwrap()
+ })
}
diff --git a/datafusion/functions/src/crypto/sha512.rs
b/datafusion/functions/src/crypto/sha512.rs
index dc3bfac9d8..c88579fd08 100644
--- a/datafusion/functions/src/crypto/sha512.rs
+++ b/datafusion/functions/src/crypto/sha512.rs
@@ -19,8 +19,12 @@
use super::basic::{sha512, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
+use datafusion_expr::{
+ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+};
use std::any::Any;
+use std::sync::OnceLock;
#[derive(Debug)]
pub struct SHA512Func {
@@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA512Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}
+
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha512(args)
}
+
+ fn documentation(&self) -> Option<&Documentation> {
+ Some(get_sha512_doc())
+ }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_sha512_doc() -> &'static Documentation {
+ DOCUMENTATION.get_or_init(|| {
+ Documentation::builder()
+ .with_doc_section(DOC_SECTION_HASHING)
+ .with_description("Computes the SHA-512 hash of a binary string.")
+ .with_syntax_example("sha512(expression)")
+ .with_sql_example(
+ r#"```sql
+> select sha512('foo');
++-------------------------------------------+
+| sha512(Utf8("foo")) |
++-------------------------------------------+
+| <sha512_hash_result> |
++-------------------------------------------+
+```"#,
+ )
+ .with_argument("expression", "String")
+ .build()
+ .unwrap()
+ })
}
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index f4c5163f49..4807673890 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -3066,90 +3066,6 @@ select map_values(map([100, 5], [42,43]));
[42, 43]
```
-## Hashing Functions
-
-- [digest](#digest)
-- [md5](#md5)
-- [sha256](#sha256)
-- [sha384](#sha384)
-- [sha512](#sha512)
-
-### `digest`
-
-Computes the binary hash of an expression using the specified algorithm.
-
-```
-digest(expression, algorithm)
-```
-
-#### Arguments
-
-- **expression**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-- **algorithm**: String expression specifying algorithm to use.
- Must be one of:
-
- - md5
- - sha224
- - sha256
- - sha384
- - sha512
- - blake2s
- - blake2b
- - blake3
-
-### `md5`
-
-Computes an MD5 128-bit checksum for a string expression.
-
-```
-md5(expression)
-```
-
-#### Arguments
-
-- **expression**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-### `sha256`
-
-Computes the SHA-256 hash of a binary string.
-
-```
-sha256(expression)
-```
-
-#### Arguments
-
-- **expression**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-### `sha384`
-
-Computes the SHA-384 hash of a binary string.
-
-```
-sha384(expression)
-```
-
-#### Arguments
-
-- **expression**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
-### `sha512`
-
-Computes the SHA-512 hash of a binary string.
-
-```
-sha512(expression)
-```
-
-#### Arguments
-
-- **expression**: String expression to operate on.
- Can be a constant, column, or function, and any combination of string
operators.
-
## Other Functions
- [arrow_cast](#arrow_cast)
diff --git a/docs/source/user-guide/sql/scalar_functions_new.md
b/docs/source/user-guide/sql/scalar_functions_new.md
index 1b6b0ffd59..673c55f46b 100644
--- a/docs/source/user-guide/sql/scalar_functions_new.md
+++ b/docs/source/user-guide/sql/scalar_functions_new.md
@@ -1161,7 +1161,67 @@ Additional examples can be found
[here](https://github.com/apache/datafusion/blo
## Hashing Functions
+- [digest](#digest)
+- [md5](#md5)
- [sha224](#sha224)
+- [sha256](#sha256)
+- [sha384](#sha384)
+- [sha512](#sha512)
+
+### `digest`
+
+Computes the binary hash of an expression using the specified algorithm.
+
+```
+digest(expression, algorithm)
+```
+
+#### Arguments
+
+- **expression**: String expression to operate on. Can be a constant, column,
or function, and any combination of operators.
+- **algorithm**: String expression specifying algorithm to use. Must be one of:
+- md5
+- sha224
+- sha256
+- sha384
+- sha512
+- blake2s
+- blake2b
+- blake3
+
+#### Example
+
+```sql
+> select digest('foo', 'sha256');
++------------------------------------------+
+| digest(Utf8("foo"), Utf8("sha256")) |
++------------------------------------------+
+| <binary_hash_result> |
++------------------------------------------+
+```
+
+### `md5`
+
+Computes an MD5 128-bit checksum for a string expression.
+
+```
+md5(expression)
+```
+
+#### Arguments
+
+- **expression**: String expression to operate on. Can be a constant, column,
or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select md5('foo');
++-------------------------------------+
+| md5(Utf8("foo")) |
++-------------------------------------+
+| <md5_checksum_result> |
++-------------------------------------+
+```
### `sha224`
@@ -1174,3 +1234,83 @@ sha224(expression)
#### Arguments
- **expression**: String expression to operate on. Can be a constant, column,
or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select sha224('foo');
++------------------------------------------+
+| sha224(Utf8("foo")) |
++------------------------------------------+
+| <sha224_hash_result> |
++------------------------------------------+
+```
+
+### `sha256`
+
+Computes the SHA-256 hash of a binary string.
+
+```
+sha256(expression)
+```
+
+#### Arguments
+
+- **expression**: String expression to operate on. Can be a constant, column,
or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select sha256('foo');
++--------------------------------------+
+| sha256(Utf8("foo")) |
++--------------------------------------+
+| <sha256_hash_result> |
++--------------------------------------+
+```
+
+### `sha384`
+
+Computes the SHA-384 hash of a binary string.
+
+```
+sha384(expression)
+```
+
+#### Arguments
+
+- **expression**: String expression to operate on. Can be a constant, column,
or function, and any combination of operators.
+
+#### Example
+
+```sql
+> select sha384('foo');
++-----------------------------------------+
+| sha384(Utf8("foo")) |
++-----------------------------------------+
+| <sha384_hash_result> |
++-----------------------------------------+
+```
+
+### `sha512`
+
+Computes the SHA-512 hash of a binary string.
+
+```
+sha512(expression)
+```
+
+#### Arguments
+
+- **expression**: String
+
+#### Example
+
+```sql
+> select sha512('foo');
++-------------------------------------------+
+| sha512(Utf8("foo")) |
++-------------------------------------------+
+| <sha512_hash_result> |
++-------------------------------------------+
+```
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]