This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new f38443d  [python] add digest python function (#1127)
f38443d is described below

commit f38443d2338ea6953e3ce3178e937173bb53df94
Author: Jiayu Liu <[email protected]>
AuthorDate: Sun Oct 17 23:15:19 2021 +0800

    [python] add digest python function (#1127)
    
    * add digest python function
    
    * add test result
    
    * ignore long lines
---
 .github/workflows/python_test.yaml    |  2 +-
 python/src/functions.rs               | 13 +++++++
 python/tests/test_string_functions.py | 72 +++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python_test.yaml 
b/.github/workflows/python_test.yaml
index 5419adb..17e7abb 100644
--- a/.github/workflows/python_test.yaml
+++ b/.github/workflows/python_test.yaml
@@ -49,7 +49,7 @@ jobs:
       - name: Run Linters
         run: |
           source venv/bin/activate
-          flake8 python
+          flake8 python --ignore=E501
           black --line-length 79 --check python
       - name: Run tests
         run: |
diff --git a/python/src/functions.rs b/python/src/functions.rs
index 8611ca5..cecf28d 100644
--- a/python/src/functions.rs
+++ b/python/src/functions.rs
@@ -93,6 +93,18 @@ fn random() -> expression::Expression {
     }
 }
 
+/// Computes a binary hash of the given data. type is the algorithm to use.
+/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, 
blake2b, and blake3.
+#[pyfunction(value, method)]
+fn digest(
+    value: expression::Expression,
+    method: expression::Expression,
+) -> expression::Expression {
+    expression::Expression {
+        expr: logical_plan::digest(value.expr, method.expr),
+    }
+}
+
 /// Concatenates the text representations of all the arguments.
 /// NULL arguments are ignored.
 #[pyfunction(args = "*")]
@@ -340,6 +352,7 @@ pub fn init(module: &PyModule) -> PyResult<()> {
     module.add_function(wrap_pyfunction!(ltrim, module)?)?;
     module.add_function(wrap_pyfunction!(max, module)?)?;
     module.add_function(wrap_pyfunction!(md5, module)?)?;
+    module.add_function(wrap_pyfunction!(digest, module)?)?;
     module.add_function(wrap_pyfunction!(min, module)?)?;
     module.add_function(wrap_pyfunction!(now, module)?)?;
     module.add_function(wrap_pyfunction!(octet_length, module)?)?;
diff --git a/python/tests/test_string_functions.py 
b/python/tests/test_string_functions.py
index ea064a6..965f087 100644
--- a/python/tests/test_string_functions.py
+++ b/python/tests/test_string_functions.py
@@ -47,3 +47,75 @@ def test_string_functions(df):
         ]
     )
     assert result.column(1) == pa.array(["hello", "world", "!"])
+
+
+def test_hash_functions(df):
+    df = df.select(
+        *[
+            f.digest(f.col("a"), f.lit(m))
+            for m in ("md5", "sha256", "sha512", "blake2s", "blake3")
+        ]
+    )
+    result = df.collect()
+    assert len(result) == 1
+    result = result[0]
+    b = bytearray.fromhex
+    assert result.column(0) == pa.array(
+        [
+            b("8B1A9953C4611296A827ABF8C47804D7"),
+            b("F5A7924E621E84C9280A9A27E1BCB7F6"),
+            b("9033E0E305F247C0C3C80D0C7848C8B3"),
+        ]
+    )
+    assert result.column(1) == pa.array(
+        [
+            b(
+                
"185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"
+            ),
+            b(
+                
"78AE647DC5544D227130A0682A51E30BC7777FBB6D8A8F17007463A3ECD1D524"
+            ),
+            b(
+                
"BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"
+            ),
+        ]
+    )
+    assert result.column(2) == pa.array(
+        [
+            b(
+                
"3615F80C9D293ED7402687F94B22D58E529B8CC7916F8FAC7FDDF7FBD5AF4CF777D3D795A7A00A16BF7E7F3FB9561EE9BAAE480DA9FE7A18769E71886B03F315"
+            ),
+            b(
+                
"8EA77393A42AB8FA92500FB077A9509CC32BC95E72712EFA116EDAF2EDFAE34FBB682EFDD6C5DD13C117E08BD4AAEF71291D8AACE2F890273081D0677C16DF0F"
+            ),
+            b(
+                
"3831A6A6155E509DEE59A7F451EB35324D8F8F2DF6E3708894740F98FDEE23889F4DE5ADB0C5010DFB555CDA77C8AB5DC902094C52DE3278F35A75EBC25F093A"
+            ),
+        ]
+    )
+    assert result.column(3) == pa.array(
+        [
+            b(
+                
"F73A5FBF881F89B814871F46E26AD3FA37CB2921C5E8561618639015B3CCBB71"
+            ),
+            b(
+                
"B792A0383FB9E7A189EC150686579532854E44B71AC394831DAED169BA85CCC5"
+            ),
+            b(
+                
"27988A0E51812297C77A433F635233346AEE29A829DCF4F46E0F58F402C6CFCB"
+            ),
+        ]
+    )
+    assert result.column(4) == pa.array(
+        [
+            b(
+                
"FBC2B0516EE8744D293B980779178A3508850FDCFE965985782C39601B65794F"
+            ),
+            b(
+                
"BF73D18575A736E4037D45F9E316085B86C19BE6363DE6AA789E13DEAACC1C4E"
+            ),
+            b(
+                
"C8D11B9F7237E4034ADBCD2005735F9BC4C597C75AD89F4492BEC8F77D15F7EB"
+            ),
+        ]
+    )

Reply via email to