This is an automated email from the ASF dual-hosted git repository.
andygrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 107a61131f feat: route aes_encrypt / aes_decrypt / try_aes_decrypt
through codegen dispatcher (#4557)
107a61131f is described below
commit 107a61131fc83f84c368e8f238854f0e1499c403
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jun 4 15:58:28 2026 -0600
feat: route aes_encrypt / aes_decrypt / try_aes_decrypt through codegen
dispatcher (#4557)
---
docs/source/user-guide/latest/expressions.md | 6 +-
.../org/apache/comet/serde/QueryPlanSerde.scala | 1 +
.../scala/org/apache/comet/serde/statics.scala | 12 +++-
.../resources/sql-tests/expressions/misc/aes.sql | 69 ++++++++++++++++++++++
.../sql-tests/expressions/misc/aes_cbc.sql | 35 +++++++++++
.../sql-tests/expressions/misc/aes_try_decrypt.sql | 48 +++++++++++++++
6 files changed, 166 insertions(+), 5 deletions(-)
diff --git a/docs/source/user-guide/latest/expressions.md
b/docs/source/user-guide/latest/expressions.md
index b5ffd40161..ed190bebb9 100644
--- a/docs/source/user-guide/latest/expressions.md
+++ b/docs/source/user-guide/latest/expressions.md
@@ -466,8 +466,8 @@ All higher-order functions are planned via
[#4224](https://github.com/apache/dat
| Function | Status | Notes |
| --- | --- | --- |
-| `aes_decrypt` | 🔜 | Falls back; `StaticInvoke` not allowlisted; planned via
codegen dispatch
([#4558](https://github.com/apache/datafusion-comet/issues/4558)) |
-| `aes_encrypt` | 🔜 | Falls back; planned via codegen dispatch
([#4558](https://github.com/apache/datafusion-comet/issues/4558));
nondeterministic IV by default |
+| `aes_decrypt` | ✅ | Routed through the JVM codegen dispatcher |
+| `aes_encrypt` | ✅ | Routed through the JVM codegen dispatcher;
nondeterministic IV by default |
| `assert_true` | 🔜 | Lowers to `RaiseError`, which falls back |
| `current_catalog` | ✅ | Resolved to a literal by the analyzer
(`ReplaceCurrentLike`) |
| `current_database` | ✅ | Resolved to a literal by the analyzer
(`ReplaceCurrentLike`) |
@@ -485,7 +485,7 @@ All higher-order functions are planned via
[#4224](https://github.com/apache/dat
| `session_user` | ✅ | Alias of `current_user`; resolved to a literal by the
analyzer |
| `spark_partition_id` | ✅ | |
| `to_variant_object` | 🔜 | tracking
[#4098](https://github.com/apache/datafusion-comet/issues/4098) |
-| `try_aes_decrypt` | 🔜 | Falls back; planned via codegen dispatch
([#4558](https://github.com/apache/datafusion-comet/issues/4558)) |
+| `try_aes_decrypt` | ✅ | Routed through the JVM codegen dispatcher |
| `try_parse_json` | 🔜 | tracking
[#4098](https://github.com/apache/datafusion-comet/issues/4098) |
| `try_variant_get` | 🔜 | tracking
[#4098](https://github.com/apache/datafusion-comet/issues/4098) |
| `typeof` | ✅ | Foldable; resolved to a literal before Comet sees the plan |
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 385528a2eb..47525f1e4a 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -285,6 +285,7 @@ object QueryPlanSerde extends Logging with CometExprShim
with CometTypeShim {
classOf[SparkPartitionID] -> CometSparkPartitionId,
classOf[SortOrder] -> CometSortOrder,
classOf[StaticInvoke] -> CometStaticInvoke,
+ classOf[TryEval] -> CometTryEval,
classOf[UnscaledValue] -> CometUnscaledValue)
/**
diff --git a/spark/src/main/scala/org/apache/comet/serde/statics.scala
b/spark/src/main/scala/org/apache/comet/serde/statics.scala
index b2a4b991d1..33817b7383 100644
--- a/spark/src/main/scala/org/apache/comet/serde/statics.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/statics.scala
@@ -19,7 +19,7 @@
package org.apache.comet.serde
-import org.apache.spark.sql.catalyst.expressions.{Attribute,
ExpressionImplUtils, Literal, UrlCodec}
+import org.apache.spark.sql.catalyst.expressions.{Attribute,
ExpressionImplUtils, Literal, TryEval, UrlCodec}
import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils
@@ -38,7 +38,9 @@ object CometStaticInvoke extends
CometExpressionSerde[StaticInvoke] {
"read_side_padding"),
("isLuhnNumber", classOf[ExpressionImplUtils]) ->
CometScalarFunction("luhn_check"),
("encode", UrlCodec.getClass) -> CometUrlEncodeStaticInvoke,
- ("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke)
+ ("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke,
+ ("aesEncrypt", classOf[ExpressionImplUtils]) ->
CometStaticInvokeCodegenDispatch,
+ ("aesDecrypt", classOf[ExpressionImplUtils]) ->
CometStaticInvokeCodegenDispatch)
override def convert(
expr: StaticInvoke,
@@ -83,3 +85,9 @@ object CometUrlDecodeStaticInvoke extends
CometExpressionSerde[StaticInvoke] {
optExprWithFallbackReason(optExpr, expr, expr.children: _*)
}
}
+
+/** Routes a [[StaticInvoke]] through the JVM codegen dispatcher; used for
AES. */
+object CometStaticInvokeCodegenDispatch extends
CometCodegenDispatch[StaticInvoke]
+
+/** Routes [[TryEval]] through the JVM codegen dispatcher; used for
`try_aes_decrypt`. */
+object CometTryEval extends CometCodegenDispatch[TryEval]
diff --git a/spark/src/test/resources/sql-tests/expressions/misc/aes.sql
b/spark/src/test/resources/sql-tests/expressions/misc/aes.sql
new file mode 100644
index 0000000000..6e2cc4fba2
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/misc/aes.sql
@@ -0,0 +1,69 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- Tests for aes_encrypt and aes_decrypt (available since Spark 3.3). They
lower to a
+-- StaticInvoke of ExpressionImplUtils.aesEncrypt / aesDecrypt; Comet routes
both methods
+-- through the JVM codegen dispatcher (no native lowering).
+-- try_aes_decrypt (Spark 3.5+) is covered in aes_try_decrypt.sql and AES-CBC
(Spark 3.5+) in
+-- aes_cbc.sql, both gated with MinSparkVersion.
+
+statement
+CREATE TABLE test_aes(data STRING, key STRING) USING parquet
+
+statement
+INSERT INTO test_aes VALUES
+ ('hello world', '1234567890abcdef'),
+ ('apache spark', '1234567890abcdef'),
+ ('', '1234567890abcdef'),
+ (NULL, '1234567890abcdef')
+
+-- GCM round-trip (default mode, nondeterministic IV, test via round-trip)
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key), key) AS STRING) FROM test_aes
+
+-- GCM round-trip with explicit mode
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'GCM'), key, 'GCM') AS STRING)
FROM test_aes
+
+-- ECB round-trip (deterministic mode)
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING)
FROM test_aes
+
+-- CBC mode is covered separately in aes_cbc.sql (Spark added AES-CBC in 3.5).
+
+-- ECB direct: output is deterministic so we can compare directly to Spark
+query
+SELECT aes_encrypt(data, key, 'ECB') FROM test_aes
+
+-- aes_decrypt on ECB-encrypted column
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING)
FROM test_aes
+
+-- literal key and data (all literals, constant folding disabled in test suite)
+query
+SELECT CAST(aes_decrypt(aes_encrypt('hello', '1234567890abcdef', 'ECB'),
'1234567890abcdef', 'ECB') AS STRING)
+
+query
+SELECT CAST(aes_decrypt(aes_encrypt(NULL, '1234567890abcdef', 'ECB'),
'1234567890abcdef', 'ECB') AS STRING)
+
+-- 24-byte key
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef12345678', 'ECB'),
'1234567890abcdef12345678', 'ECB') AS STRING) FROM test_aes
+
+-- 32-byte key
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef1234567890abcdef',
'ECB'), '1234567890abcdef1234567890abcdef', 'ECB') AS STRING) FROM test_aes
diff --git a/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql
b/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql
new file mode 100644
index 0000000000..fb660b9a71
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql
@@ -0,0 +1,35 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- AES-CBC round-trip for aes_encrypt / aes_decrypt. Comet routes the
underlying StaticInvoke
+-- through the JVM codegen dispatcher. AES-CBC was added to Spark in 3.5
(SPARK-43042) and
+-- throws on 3.4, so this file is gated.
+-- MinSparkVersion: 3.5
+
+statement
+CREATE TABLE test_aes_cbc(data STRING, key STRING) USING parquet
+
+statement
+INSERT INTO test_aes_cbc VALUES
+ ('hello world', '1234567890abcdef'),
+ ('apache spark', '1234567890abcdef'),
+ ('', '1234567890abcdef'),
+ (NULL, '1234567890abcdef')
+
+-- CBC round-trip (nondeterministic IV, so compare via round-trip rather than
raw ciphertext)
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'CBC'), key, 'CBC') AS STRING)
FROM test_aes_cbc
diff --git
a/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql
b/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql
new file mode 100644
index 0000000000..20e1df6971
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql
@@ -0,0 +1,48 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- Tests for try_aes_decrypt, which returns NULL on invalid input instead of
throwing.
+-- try_aes_decrypt lowers to TryEval(StaticInvoke(aesDecrypt, ...)); Comet
routes both the
+-- TryEval wrapper and the inner StaticInvoke through the JVM codegen
dispatcher.
+-- try_aes_decrypt was added in Spark 3.5, so this file is gated.
+-- MinSparkVersion: 3.5
+
+statement
+CREATE TABLE test_aes_try(data STRING, key STRING) USING parquet
+
+statement
+INSERT INTO test_aes_try VALUES
+ ('hello world', '1234567890abcdef'),
+ ('apache spark', '1234567890abcdef'),
+ ('', '1234567890abcdef'),
+ (NULL, '1234567890abcdef')
+
+-- invalid ciphertext returns NULL instead of throwing
+query
+SELECT try_aes_decrypt(CAST('garbage' AS BINARY), key) FROM test_aes_try
+
+-- valid ciphertext decrypts correctly
+query
+SELECT CAST(try_aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS
STRING) FROM test_aes_try
+
+-- literal invalid ciphertext
+query
+SELECT try_aes_decrypt(CAST('not_valid_ciphertext' AS BINARY),
'1234567890abcdef')
+
+-- NULL data
+query
+SELECT try_aes_decrypt(NULL, '1234567890abcdef')
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]