This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new fb1ee25a89e [SPARK-43290][SQL] Adds support for aes_encrypt IVs and AAD fb1ee25a89e is described below commit fb1ee25a89e8b42178b7f55718859ab5117c2320 Author: Steve Weis <steve.w...@databricks.com> AuthorDate: Fri Jun 16 15:42:05 2023 +0300 [SPARK-43290][SQL] Adds support for aes_encrypt IVs and AAD ### What changes were proposed in this pull request? This change adds support for user-provided initialization vectors (IVs) or authenticated additional data (AAD) to `aes_encrypt` / `aes_decrypt`. 12-byte IVs may optionally be passed if the mode is "GCM" and 16-byte IVs may be passed if the mode is "CBC". An arbitrary binary value may be passed as additional authenticated data only if "GCM" mode is used. ### Why are the changes needed? Callers may wish to provide their own IV values so that the output ciphertext matches a ciphertext generated outside of Spark. AAD is used to bind some input to a ciphertext and ensure that it is presented during decryption -- often used to scope an operation to a specific context. ### Does this PR introduce _any_ user-facing change? Yes, this change introduces two optional parameters to `aes_encrypt` and one optional parameter to `aes_decrypt`: ``` aes_encrypt(expr, key[, mode[, padding[, iv[, aad]]]]) aes_decrypt(expr, key[, mode[, padding[, iv]]]) ``` ### How was this patch tested? ``` build/sbt "sql/test:testOnly org.apache.spark.sql.DataFrameFunctionsSuite -- -z aes" ``` Closes #41488 from sweisdb/SPARK-43290. Authored-by: Steve Weis <steve.w...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../catalyst/expressions/ExpressionImplUtils.java | 14 +---- .../spark/sql/catalyst/expressions/misc.scala | 64 +++++++++++++++++----- .../expressions/ExpressionImplUtilsSuite.scala | 23 +++++++- .../sql-functions/sql-expression-schema.md | 6 +- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 50 +++++++++++++++++ 5 files changed, 127 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java index 6aae649718a..a604e6bf225 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java @@ -111,14 +111,6 @@ public class ExpressionImplUtils { return checkSum % 10 == 0; } - public static byte[] aesEncrypt(byte[] input, byte[] key, UTF8String mode, UTF8String padding) { - return aesEncrypt(input, key, mode, padding, null, null); - } - - public static byte[] aesDecrypt(byte[] input, byte[] key, UTF8String mode, UTF8String padding) { - return aesDecrypt(input, key, mode, padding, null); - } - public static byte[] aesEncrypt(byte[] input, byte[] key, UTF8String mode, @@ -192,7 +184,7 @@ public class ExpressionImplUtils { Cipher cipher = Cipher.getInstance(cipherMode.transformation); if (opmode == Cipher.ENCRYPT_MODE) { // This may be 0-length for ECB - if (iv == null) { + if (iv == null || iv.length == 0) { iv = generateIv(cipherMode); } else if (!cipherMode.usesSpec) { // If the caller passes an IV, ensure the mode actually uses it. @@ -210,7 +202,7 @@ public class ExpressionImplUtils { } // If the cipher mode supports additional authenticated data and it is provided, update it - if (aad != null) { + if (aad != null && aad.length != 0) { if (cipherMode.supportsAad != true) { throw QueryExecutionErrors.aesUnsupportedAad(mode); } @@ -231,7 +223,7 @@ public class ExpressionImplUtils { if (cipherMode.usesSpec) { AlgorithmParameterSpec algSpec = getParamSpec(cipherMode, input); cipher.init(opmode, secretKey, algSpec); - if (aad != null) { + if (aad != null && aad.length != 0) { if (cipherMode.supportsAad != true) { throw QueryExecutionErrors.aesUnsupportedAad(mode); } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 67328cde71a..92ed0843521 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -312,8 +312,10 @@ case class CurrentUser() extends LeafExpression with Unevaluable { // scalastyle:off line.size.limit @ExpressionDescription( usage = """ - _FUNC_(expr, key[, mode[, padding]]) - Returns an encrypted value of `expr` using AES in given `mode` with the specified `padding`. + _FUNC_(expr, key[, mode[, padding[, iv[, aad]]]]) - Returns an encrypted value of `expr` using AES in given `mode` with the specified `padding`. Key lengths of 16, 24 and 32 bits are supported. Supported combinations of (`mode`, `padding`) are ('ECB', 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS'). + Optional initialization vectors (IVs) are only supported for CBC and GCM modes. These must be 16 bytes for CBC and 12 bytes for GCM. If not provided, a random vector will be generated and prepended to the output. + Optional additional authenticated data (AAD) is only supported for GCM. If provided for encryption, the identical AAD value must be provided for decryption. The default mode is GCM. """, arguments = """ @@ -324,6 +326,10 @@ case class CurrentUser() extends LeafExpression with Unevaluable { Valid modes: ECB, GCM, CBC. * padding - Specifies how to pad messages whose length is not a multiple of the block size. Valid values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS for CBC. + * iv - Optional initialization vector. Only supported for CBC and GCM modes. + Valid values: None or ''. 16-byte array for CBC mode. 12-byte array for GCM mode. + * aad - Optional additional authenticated data. Only supported for GCM mode. This can be any free-form input and + must be provided for both encryption and decryption. """, examples = """ Examples: @@ -335,6 +341,10 @@ case class CurrentUser() extends LeafExpression with Unevaluable { 3lmwu+Mw0H3fi5NDvcu9lg== > SELECT base64(_FUNC_('Apache Spark', '1234567890abcdef', 'CBC', 'DEFAULT')); 2NYmDCjgXTbbxGA3/SnJEfFC/JQ7olk2VQWReIAAFKo= + > SELECT base64(_FUNC_('Spark', 'abcdefghijklmnop12345678ABCDEFGH', 'CBC', 'DEFAULT', unhex('00000000000000000000000000000000'))); + AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg= + > SELECT base64(_FUNC_('Spark', 'abcdefghijklmnop12345678ABCDEFGH', 'GCM', 'DEFAULT', unhex('000000000000000000000000'), 'This is an AAD mixed into the input')); + AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4 """, since = "3.3.0", group = "misc_funcs") @@ -342,16 +352,22 @@ case class AesEncrypt( input: Expression, key: Expression, mode: Expression, - padding: Expression) + padding: Expression, + iv: Expression, + aad: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes { override lazy val replacement: Expression = StaticInvoke( classOf[ExpressionImplUtils], BinaryType, "aesEncrypt", - Seq(input, key, mode, padding), + Seq(input, key, mode, padding, iv, aad), inputTypes) + def this(input: Expression, key: Expression, mode: Expression, padding: Expression, iv: Expression) = + this(input, key, mode, padding, iv, Literal("")) + def this(input: Expression, key: Expression, mode: Expression, padding: Expression) = + this(input, key, mode, padding, Literal("")) def this(input: Expression, key: Expression, mode: Expression) = this(input, key, mode, Literal("DEFAULT")) def this(input: Expression, key: Expression) = @@ -359,13 +375,14 @@ case class AesEncrypt( override def prettyName: String = "aes_encrypt" - override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType, BinaryType, StringType, StringType) + override def inputTypes: Seq[AbstractDataType] = + Seq(BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType) - override def children: Seq[Expression] = Seq(input, key, mode, padding) + override def children: Seq[Expression] = Seq(input, key, mode, padding, iv, aad) override protected def withNewChildrenInternal( newChildren: IndexedSeq[Expression]): Expression = { - copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3)) + copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3), newChildren(4), newChildren(5)) } } @@ -378,8 +395,9 @@ case class AesEncrypt( */ @ExpressionDescription( usage = """ - _FUNC_(expr, key[, mode[, padding]]) - Returns a decrypted value of `expr` using AES in `mode` with `padding`. + _FUNC_(expr, key[, mode[, padding[, aad]]]) - Returns a decrypted value of `expr` using AES in `mode` with `padding`. Key lengths of 16, 24 and 32 bits are supported. Supported combinations of (`mode`, `padding`) are ('ECB', 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS'). + Optional additional authenticated data (AAD) is only supported for GCM. If provided for encryption, the identical AAD value must be provided for decryption. The default mode is GCM. """, arguments = """ @@ -390,6 +408,8 @@ case class AesEncrypt( Valid modes: ECB, GCM, CBC. * padding - Specifies how to pad messages whose length is not a multiple of the block size. Valid values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS for CBC. + * aad - Optional additional authenticated data. Only supported for GCM mode. This can be any free-form input and + must be provided for both encryption and decryption. """, examples = """ Examples: @@ -401,6 +421,10 @@ case class AesEncrypt( Spark SQL > SELECT _FUNC_(unbase64('2NYmDCjgXTbbxGA3/SnJEfFC/JQ7olk2VQWReIAAFKo='), '1234567890abcdef', 'CBC'); Apache Spark + > SELECT _FUNC_(unbase64('AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg='), 'abcdefghijklmnop12345678ABCDEFGH', 'CBC', 'DEFAULT'); + Spark + > SELECT _FUNC_(unbase64('AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4'), 'abcdefghijklmnop12345678ABCDEFGH', 'GCM', 'DEFAULT', 'This is an AAD mixed into the input'); + Spark """, since = "3.3.0", group = "misc_funcs") @@ -408,37 +432,40 @@ case class AesDecrypt( input: Expression, key: Expression, mode: Expression, - padding: Expression) + padding: Expression, + aad: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes { override lazy val replacement: Expression = StaticInvoke( classOf[ExpressionImplUtils], BinaryType, "aesDecrypt", - Seq(input, key, mode, padding), + Seq(input, key, mode, padding, aad), inputTypes) + def this(input: Expression, key: Expression, mode: Expression, padding: Expression) = + this(input, key, mode, padding, Literal("")) def this(input: Expression, key: Expression, mode: Expression) = this(input, key, mode, Literal("DEFAULT")) def this(input: Expression, key: Expression) = this(input, key, Literal("GCM")) override def inputTypes: Seq[AbstractDataType] = { - Seq(BinaryType, BinaryType, StringType, StringType) + Seq(BinaryType, BinaryType, StringType, StringType, BinaryType) } override def prettyName: String = "aes_decrypt" - override def children: Seq[Expression] = Seq(input, key, mode, padding) + override def children: Seq[Expression] = Seq(input, key, mode, padding, aad) override protected def withNewChildrenInternal( newChildren: IndexedSeq[Expression]): Expression = { - copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3)) + copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3), newChildren(4)) } } @ExpressionDescription( - usage = "_FUNC_(expr, key[, mode[, padding]]) - This is a special version of `aes_decrypt` that performs the same operation, but returns a NULL value instead of raising an error if the decryption cannot be performed.", + usage = "_FUNC_(expr, key[, mode[, padding[, aad]]]) - This is a special version of `aes_decrypt` that performs the same operation, but returns a NULL value instead of raising an error if the decryption cannot be performed.", examples = """ Examples: > SELECT _FUNC_(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM'); @@ -454,10 +481,17 @@ case class TryAesDecrypt( key: Expression, mode: Expression, padding: Expression, + aad: Expression, replacement: Expression) extends RuntimeReplaceable with InheritAnalysisRules { + def this(input: Expression, + key: Expression, + mode: Expression, + padding: Expression, + aad: Expression) = + this(input, key, mode, padding, aad, TryEval(AesDecrypt(input, key, mode, padding, aad))) def this(input: Expression, key: Expression, mode: Expression, padding: Expression) = - this(input, key, mode, padding, TryEval(AesDecrypt(input, key, mode, padding))) + this(input, key, mode, padding, Literal("")) def this(input: Expression, key: Expression, mode: Expression) = this(input, key, mode, Literal("DEFAULT")) def this(input: Expression, key: Expression) = @@ -465,7 +499,7 @@ case class TryAesDecrypt( override def prettyName: String = "try_aes_decrypt" - override def parameters: Seq[Expression] = Seq(input, key, mode, padding) + override def parameters: Seq[Expression] = Seq(input, key, mode, padding, aad) override protected def withNewChildInternal(newChild: Expression): Expression = this.copy(replacement = newChild) diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala index 52258156e31..3b0dd82c173 100644 --- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala +++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala @@ -34,11 +34,16 @@ class ExpressionImplUtilsSuite extends SparkFunSuite { aadOpt: Option[String] = None, expectedErrorClassOpt: Option[String] = None, errorParamsMap: Map[String, String] = Map()) { + + def isIvDefined: Boolean = { + ivHexOpt.isDefined && ivHexOpt.get != null && ivHexOpt.get.length > 0 + } + val plaintextBytes: Array[Byte] = plaintext.getBytes("UTF-8") val keyBytes: Array[Byte] = key.getBytes("UTF-8") val utf8mode: UTF8String = UTF8String.fromString(mode) val utf8Padding: UTF8String = UTF8String.fromString(padding) - val deterministic: Boolean = mode.equalsIgnoreCase("ECB") || ivHexOpt.isDefined + val deterministic: Boolean = mode.equalsIgnoreCase("ECB") || isIvDefined val ivBytes: Array[Byte] = ivHexOpt.map({ivHex => Hex.unhex(ivHex.getBytes("UTF-8"))}).getOrElse(null) val aadBytes: Array[Byte] = aadOpt.map({aad => aad.getBytes("UTF-8")}).getOrElse(null) @@ -59,11 +64,27 @@ class ExpressionImplUtilsSuite extends SparkFunSuite { "abcdefghijklmnop12345678ABCDEFGH", "9J3iZbIxnmaG+OIA9Amd+A==", "ECB"), + // Test passing non-null, but empty arrays for IV and AAD + TestCase( + "Spark", + "abcdefghijklmnop12345678ABCDEFGH", + "9J3iZbIxnmaG+OIA9Amd+A==", + "ECB", + ivHexOpt = Some(""), + aadOpt = Some("")), TestCase( "Spark", "abcdefghijklmnop12345678ABCDEFGH", "+MgyzJxhusYVGWCljk7fhhl6C6oUqWmtdqoaG93KvhY=", "CBC"), + // Test passing non-null, but empty arrays for IV and AAD + TestCase( + "Spark", + "abcdefghijklmnop12345678ABCDEFGH", + "+MgyzJxhusYVGWCljk7fhhl6C6oUqWmtdqoaG93KvhY=", + "CBC", + ivHexOpt = Some(""), + aadOpt = Some("")), TestCase( "Apache Spark", "1234567890abcdef", diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index eef61195357..32c4c02b1b2 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -7,8 +7,8 @@ | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct<ACOSH(1):double> | | org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> | | org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct<add_months(2016-08-31, 1):date> | -| org.apache.spark.sql.catalyst.expressions.AesDecrypt | aes_decrypt | SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333') | struct<aes_decrypt(unhex(83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94), 0000111122223333, GCM, DEFAULT):binary> | -| org.apache.spark.sql.catalyst.expressions.AesEncrypt | aes_encrypt | SELECT hex(aes_encrypt('Spark', '0000111122223333')) | struct<hex(aes_encrypt(Spark, 0000111122223333, GCM, DEFAULT)):string> | +| org.apache.spark.sql.catalyst.expressions.AesDecrypt | aes_decrypt | SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333') | struct<aes_decrypt(unhex(83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94), 0000111122223333, GCM, DEFAULT, ):binary> | +| org.apache.spark.sql.catalyst.expressions.AesEncrypt | aes_encrypt | SELECT hex(aes_encrypt('Spark', '0000111122223333')) | struct<hex(aes_encrypt(Spark, 0000111122223333, GCM, DEFAULT, , )):string> | | org.apache.spark.sql.catalyst.expressions.And | and | SELECT true and true | struct<(true AND true):boolean> | | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> | | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | reduce | SELECT reduce(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<reduce(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> | @@ -331,7 +331,7 @@ | org.apache.spark.sql.catalyst.expressions.TruncDate | trunc | SELECT trunc('2019-08-04', 'week') | struct<trunc(2019-08-04, week):date> | | org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR, 2015-03-05T09:32:05.359):timestamp> | | org.apache.spark.sql.catalyst.expressions.TryAdd | try_add | SELECT try_add(1, 2) | struct<try_add(1, 2):int> | -| org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT):binary> | +| org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT, ):binary> | | org.apache.spark.sql.catalyst.expressions.TryDivide | try_divide | SELECT try_divide(3, 2) | struct<try_divide(3, 2):double> | | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 3), 2):int> | | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 037202de9c9..4d7e8cbb351 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -408,6 +408,56 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { } } + test("aes IV test function") { + val key32 = "abcdefghijklmnop12345678ABCDEFGH" + val gcmIv = "000000000000000000000000" + val encryptedGcm = "AAAAAAAAAAAAAAAAQiYi+sRNYDAOTjdSEcYBFsAWPL1f" + val cbcIv = "00000000000000000000000000000000" + val encryptedCbc = "AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg=" + val df1 = Seq("Spark").toDF + Seq( + (key32, encryptedGcm, "GCM", gcmIv), + (key32, encryptedCbc, "CBC", cbcIv)).foreach { + case (key, ciphertext, mode, iv) => + checkAnswer( + df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " + + s"'$key', '$mode', 'DEFAULT') as string)"), + Seq(Row("Spark"))) + checkAnswer( + df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " + + s"binary('$key'), '$mode', 'DEFAULT') as string)"), + Seq(Row("Spark"))) + checkAnswer( + df1.selectExpr( + s"base64(aes_encrypt(value, '$key32', '$mode', 'DEFAULT', unhex('$iv')))"), + Seq(Row(ciphertext))) + } + } + + test("aes IV and AAD test function") { + val key32 = "abcdefghijklmnop12345678ABCDEFGH" + val gcmIv = "000000000000000000000000" + val aad = "This is an AAD mixed into the input" + val encryptedGcm = "AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4" + val df1 = Seq("Spark").toDF + Seq( + (key32, encryptedGcm, "GCM", gcmIv, aad)).foreach { + case (key, ciphertext, mode, iv, aad) => + checkAnswer( + df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " + + s"'$key', '$mode', 'DEFAULT', '$aad') as string)"), + Seq(Row("Spark"))) + checkAnswer( + df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " + + s"binary('$key'), '$mode', 'DEFAULT', '$aad') as string)"), + Seq(Row("Spark"))) + checkAnswer( + df1.selectExpr( + s"base64(aes_encrypt(value, '$key32', '$mode', 'DEFAULT', unhex('$iv'), '$aad'))"), + Seq(Row(ciphertext))) + } + } + test("misc aes ECB function") { val key16 = "abcdefghijklmnop" val key24 = "abcdefghijklmnop12345678" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org