This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9556da8834b0 [SPARK-46673][PYTHON][DOCS] Refine docstring
`aes_encrypt/aes_decrypt/try_aes_decrypt`
9556da8834b0 is described below
commit 9556da8834b0b6ef6d4237a46a62cadd839c88e7
Author: panbingkun <[email protected]>
AuthorDate: Mon Jan 22 11:18:40 2024 +0300
[SPARK-46673][PYTHON][DOCS] Refine docstring
`aes_encrypt/aes_decrypt/try_aes_decrypt`
### What changes were proposed in this pull request?
The pr aims to refine docstring of
`aes_encrypt/aes_decrypt/try_aes_decrypt`.
### Why are the changes needed?
To improve PySpark documentation.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- Pass GA.
- Manually test.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44750 from panbingkun/SPARK-46673.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
python/pyspark/sql/functions/builtin.py | 246 ++++++++++++++++++++++++++------
1 file changed, 201 insertions(+), 45 deletions(-)
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index ca2efde0b3c2..d3a94fe4b9e9 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -18836,6 +18836,8 @@ def nvl2(col1: "ColumnOrName", col2: "ColumnOrName",
col3: "ColumnOrName") -> Co
return _invoke_function_over_columns("nvl2", col1, col2, col3)
+# TODO(SPARK-46738) Re-enable testing that includes the 'Cast' operation after
+# fixing the display difference between Regular Spark and Spark Connect on
`Cast`.
@_try_remote_functions
def aes_encrypt(
input: "ColumnOrName",
@@ -18877,50 +18879,96 @@ def aes_encrypt(
Optional additional authenticated data. Only supported for GCM mode.
This can be any
free-form input and must be provided for both encryption and
decryption.
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ A new column that contains an encrypted value.
+
Examples
--------
+
+ Example 1: Encrypt data with key, mode, padding, iv and aad.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "Spark", "abcdefghijklmnop12345678ABCDEFGH", "GCM", "DEFAULT",
... "000000000000000000000000", "This is an AAD mixed into the
input",)],
... ["input", "key", "mode", "padding", "iv", "aad"]
... )
- >>> df.select(base64(aes_encrypt(
- ... df.input, df.key, df.mode, df.padding, to_binary(df.iv,
lit("hex")), df.aad)
- ... ).alias('r')).collect()
- [Row(r='AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4')]
+ >>> df.select(sf.base64(sf.aes_encrypt(
+ ... df.input, df.key, df.mode, df.padding, sf.to_binary(df.iv,
sf.lit("hex")), df.aad)
+ ... )).show(truncate=False)
+ +-----------------------------------------------------------------------+
+ |base64(aes_encrypt(input, key, mode, padding, to_binary(iv, hex), aad))|
+ +-----------------------------------------------------------------------+
+ |AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4 |
+ +-----------------------------------------------------------------------+
- >>> df.select(base64(aes_encrypt(
- ... df.input, df.key, df.mode, df.padding, to_binary(df.iv,
lit("hex")))
- ... ).alias('r')).collect()
- [Row(r='AAAAAAAAAAAAAAAAQiYi+sRNYDAOTjdSEcYBFsAWPL1f')]
+ Example 2: Encrypt data with key, mode, padding and iv.
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(
+ ... "Spark", "abcdefghijklmnop12345678ABCDEFGH", "GCM", "DEFAULT",
+ ... "000000000000000000000000", "This is an AAD mixed into the
input",)],
+ ... ["input", "key", "mode", "padding", "iv", "aad"]
+ ... )
+ >>> df.select(sf.base64(sf.aes_encrypt(
+ ... df.input, df.key, df.mode, df.padding, sf.to_binary(df.iv,
sf.lit("hex")))
+ ... )).show(truncate=False)
+ +--------------------------------------------------------------------+
+ |base64(aes_encrypt(input, key, mode, padding, to_binary(iv, hex), ))|
+ +--------------------------------------------------------------------+
+ |AAAAAAAAAAAAAAAAQiYi+sRNYDAOTjdSEcYBFsAWPL1f |
+ +--------------------------------------------------------------------+
+
+ Example 3: Encrypt data with key, mode and padding.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "Spark SQL", "1234567890abcdef", "ECB", "PKCS",)],
... ["input", "key", "mode", "padding"]
... )
- >>> df.select(aes_decrypt(aes_encrypt(df.input, df.key, df.mode,
df.padding),
- ... df.key, df.mode, df.padding).alias('r')
- ... ).collect()
- [Row(r=bytearray(b'Spark SQL'))]
+ >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, df.mode,
df.padding),
+ ... df.key, df.mode, df.padding
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+
+---------------------------------------------------------------------------------------------+
+ |CAST(aes_decrypt(aes_encrypt(input, key, mode, padding, , ), key, mode,
padding, ) AS STRING)|
+
+---------------------------------------------------------------------------------------------+
+ |Spark SQL
|
+
+---------------------------------------------------------------------------------------------+
+ Example 4: Encrypt data with key and mode.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "Spark SQL", "0000111122223333", "ECB",)],
... ["input", "key", "mode"]
... )
- >>> df.select(aes_decrypt(aes_encrypt(df.input, df.key, df.mode),
- ... df.key, df.mode).alias('r')
- ... ).collect()
- [Row(r=bytearray(b'Spark SQL'))]
+ >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, df.mode),
+ ... df.key, df.mode
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+
+---------------------------------------------------------------------------------------------+
+ |CAST(aes_decrypt(aes_encrypt(input, key, mode, DEFAULT, , ), key, mode,
DEFAULT, ) AS STRING)|
+
+---------------------------------------------------------------------------------------------+
+ |Spark SQL
|
+
+---------------------------------------------------------------------------------------------+
+
+ Example 5: Encrypt data with key.
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "Spark SQL", "abcdefghijklmnop",)],
... ["input", "key"]
... )
- >>> df.select(aes_decrypt(
- ... unbase64(base64(aes_encrypt(df.input, df.key))), df.key
- ... ).cast("STRING").alias('r')).collect()
- [Row(r='Spark SQL')]
- """
+ >>> df.select(sf.aes_decrypt(
+ ... sf.unbase64(sf.base64(sf.aes_encrypt(df.input, df.key))), df.key
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+
+-------------------------------------------------------------------------------------------------------------+
+ |CAST(aes_decrypt(unbase64(base64(aes_encrypt(input, key, GCM, DEFAULT, ,
))), key, GCM, DEFAULT, ) AS STRING)|
+
+-------------------------------------------------------------------------------------------------------------+
+ |Spark SQL
|
+
+-------------------------------------------------------------------------------------------------------------+
+ """ # noqa: E501
_mode = lit("GCM") if mode is None else mode
_padding = lit("DEFAULT") if padding is None else padding
_iv = lit("") if iv is None else iv
@@ -18928,6 +18976,8 @@ def aes_encrypt(
return _invoke_function_over_columns("aes_encrypt", input, key, _mode,
_padding, _iv, _aad)
+# TODO(SPARK-46738) Re-enable testing that includes the 'Cast' operation after
+# fixing the display difference between Regular Spark and Spark Connect on
`Cast`.
@_try_remote_functions
def aes_decrypt(
input: "ColumnOrName",
@@ -18962,39 +19012,82 @@ def aes_decrypt(
Optional additional authenticated data. Only supported for GCM mode.
This can be any
free-form input and must be provided for both encryption and
decryption.
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ A new column that contains an decrypted value.
+
Examples
--------
+
+ Example 1: Decrypt data with key, mode, padding and aad.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4",
... "abcdefghijklmnop12345678ABCDEFGH", "GCM", "DEFAULT",
... "This is an AAD mixed into the input",)],
... ["input", "key", "mode", "padding", "aad"]
... )
- >>> df.select(aes_decrypt(
- ... unbase64(df.input), df.key, df.mode, df.padding, df.aad).alias('r')
- ... ).collect()
- [Row(r=bytearray(b'Spark'))]
+ >>> df.select(sf.aes_decrypt(
+ ... sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +---------------------------------------------------------------------+
+ |CAST(aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
+ +---------------------------------------------------------------------+
+ |Spark |
+ +---------------------------------------------------------------------+
+ Example 2: Decrypt data with key, mode and padding.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg=",
... "abcdefghijklmnop12345678ABCDEFGH", "CBC", "DEFAULT",)],
... ["input", "key", "mode", "padding"]
... )
- >>> df.select(aes_decrypt(
- ... unbase64(df.input), df.key, df.mode, df.padding).alias('r')
- ... ).collect()
- [Row(r=bytearray(b'Spark'))]
+ >>> df.select(sf.aes_decrypt(
+ ... sf.unbase64(df.input), df.key, df.mode, df.padding
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +------------------------------------------------------------------+
+ |CAST(aes_decrypt(unbase64(input), key, mode, padding, ) AS STRING)|
+ +------------------------------------------------------------------+
+ |Spark |
+ +------------------------------------------------------------------+
+
+ Example 3: Decrypt data with key and mode.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(
+ ... "AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg=",
+ ... "abcdefghijklmnop12345678ABCDEFGH", "CBC", "DEFAULT",)],
+ ... ["input", "key", "mode", "padding"]
+ ... )
+ >>> df.select(sf.aes_decrypt(
+ ... sf.unbase64(df.input), df.key, df.mode
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +------------------------------------------------------------------+
+ |CAST(aes_decrypt(unbase64(input), key, mode, DEFAULT, ) AS STRING)|
+ +------------------------------------------------------------------+
+ |Spark |
+ +------------------------------------------------------------------+
- >>> df.select(aes_decrypt(unbase64(df.input), df.key,
df.mode).alias('r')).collect()
- [Row(r=bytearray(b'Spark'))]
+ Example 4: Decrypt data with key.
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
...
"83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94",
... "0000111122223333",)],
... ["input", "key"]
... )
- >>> df.select(aes_decrypt(unhex(df.input), df.key).alias('r')).collect()
- [Row(r=bytearray(b'Spark'))]
+ >>> df.select(sf.aes_decrypt(
+ ... sf.unhex(df.input), df.key
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +--------------------------------------------------------------+
+ |CAST(aes_decrypt(unhex(input), key, GCM, DEFAULT, ) AS STRING)|
+ +--------------------------------------------------------------+
+ |Spark |
+ +--------------------------------------------------------------+
"""
_mode = lit("GCM") if mode is None else mode
_padding = lit("DEFAULT") if padding is None else padding
@@ -19002,6 +19095,8 @@ def aes_decrypt(
return _invoke_function_over_columns("aes_decrypt", input, key, _mode,
_padding, _aad)
+# TODO(SPARK-46738) Re-enable testing that includes the 'Cast' operation after
+# fixing the display difference between Regular Spark and Spark Connect on
`Cast`.
@_try_remote_functions
def try_aes_decrypt(
input: "ColumnOrName",
@@ -19038,39 +19133,100 @@ def try_aes_decrypt(
Optional additional authenticated data. Only supported for GCM mode.
This can be any
free-form input and must be provided for both encryption and
decryption.
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ A new column that contains an decrypted value or a NULL value.
+
Examples
--------
+
+ Example 1: Decrypt data with key, mode, padding and aad.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4",
... "abcdefghijklmnop12345678ABCDEFGH", "GCM", "DEFAULT",
... "This is an AAD mixed into the input",)],
... ["input", "key", "mode", "padding", "aad"]
... )
- >>> df.select(try_aes_decrypt(
- ... unbase64(df.input), df.key, df.mode, df.padding, df.aad).alias('r')
- ... ).collect()
- [Row(r=bytearray(b'Spark'))]
+ >>> df.select(sf.try_aes_decrypt(
+ ... sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +-------------------------------------------------------------------------+
+ |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
+ +-------------------------------------------------------------------------+
+ |Spark |
+ +-------------------------------------------------------------------------+
+ Example 2: Failed to decrypt data with key, mode, padding and aad.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(
+ ... "AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4",
+ ... "abcdefghijklmnop12345678ABCDEFGH", "CBC", "DEFAULT",
+ ... "This is an AAD mixed into the input",)],
+ ... ["input", "key", "mode", "padding", "aad"]
+ ... )
+ >>> df.select(sf.try_aes_decrypt(
+ ... sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +-------------------------------------------------------------------------+
+ |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
+ +-------------------------------------------------------------------------+
+ |NULL |
+ +-------------------------------------------------------------------------+
+
+ Example 3: Decrypt data with key, mode and padding.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
... "AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg=",
... "abcdefghijklmnop12345678ABCDEFGH", "CBC", "DEFAULT",)],
... ["input", "key", "mode", "padding"]
... )
- >>> df.select(try_aes_decrypt(
- ... unbase64(df.input), df.key, df.mode, df.padding).alias('r')
- ... ).collect()
- [Row(r=bytearray(b'Spark'))]
+ >>> df.select(sf.try_aes_decrypt(
+ ... sf.unbase64(df.input), df.key, df.mode, df.padding
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +----------------------------------------------------------------------+
+ |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, ) AS STRING)|
+ +----------------------------------------------------------------------+
+ |Spark |
+ +----------------------------------------------------------------------+
- >>> df.select(try_aes_decrypt(unbase64(df.input), df.key,
df.mode).alias('r')).collect()
- [Row(r=bytearray(b'Spark'))]
+ Example 4: Decrypt data with key and mode.
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([(
+ ... "AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg=",
+ ... "abcdefghijklmnop12345678ABCDEFGH", "CBC", "DEFAULT",)],
+ ... ["input", "key", "mode", "padding"]
+ ... )
+ >>> df.select(sf.try_aes_decrypt(
+ ... sf.unbase64(df.input), df.key, df.mode
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +----------------------------------------------------------------------+
+ |CAST(try_aes_decrypt(unbase64(input), key, mode, DEFAULT, ) AS STRING)|
+ +----------------------------------------------------------------------+
+ |Spark |
+ +----------------------------------------------------------------------+
+
+ Example 5: Decrypt data with key.
+
+ >>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([(
...
"83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94",
... "0000111122223333",)],
... ["input", "key"]
... )
- >>> df.select(try_aes_decrypt(unhex(df.input),
df.key).alias('r')).collect()
- [Row(r=bytearray(b'Spark'))]
+ >>> df.select(sf.try_aes_decrypt(
+ ... sf.unhex(df.input), df.key
+ ... ).cast("STRING")).show(truncate=False) # doctest: +SKIP
+ +------------------------------------------------------------------+
+ |CAST(try_aes_decrypt(unhex(input), key, GCM, DEFAULT, ) AS STRING)|
+ +------------------------------------------------------------------+
+ |Spark |
+ +------------------------------------------------------------------+
"""
_mode = lit("GCM") if mode is None else mode
_padding = lit("DEFAULT") if padding is None else padding
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]