This is an automated email from the ASF dual-hosted git repository.

andygrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 107a61131f feat: route aes_encrypt / aes_decrypt / try_aes_decrypt 
through codegen dispatcher (#4557)
107a61131f is described below

commit 107a61131fc83f84c368e8f238854f0e1499c403
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jun 4 15:58:28 2026 -0600

    feat: route aes_encrypt / aes_decrypt / try_aes_decrypt through codegen 
dispatcher (#4557)
---
 docs/source/user-guide/latest/expressions.md       |  6 +-
 .../org/apache/comet/serde/QueryPlanSerde.scala    |  1 +
 .../scala/org/apache/comet/serde/statics.scala     | 12 +++-
 .../resources/sql-tests/expressions/misc/aes.sql   | 69 ++++++++++++++++++++++
 .../sql-tests/expressions/misc/aes_cbc.sql         | 35 +++++++++++
 .../sql-tests/expressions/misc/aes_try_decrypt.sql | 48 +++++++++++++++
 6 files changed, 166 insertions(+), 5 deletions(-)

diff --git a/docs/source/user-guide/latest/expressions.md 
b/docs/source/user-guide/latest/expressions.md
index b5ffd40161..ed190bebb9 100644
--- a/docs/source/user-guide/latest/expressions.md
+++ b/docs/source/user-guide/latest/expressions.md
@@ -466,8 +466,8 @@ All higher-order functions are planned via 
[#4224](https://github.com/apache/dat
 
 | Function | Status | Notes |
 | --- | --- | --- |
-| `aes_decrypt` | 🔜 | Falls back; `StaticInvoke` not allowlisted; planned via 
codegen dispatch 
([#4558](https://github.com/apache/datafusion-comet/issues/4558)) |
-| `aes_encrypt` | 🔜 | Falls back; planned via codegen dispatch 
([#4558](https://github.com/apache/datafusion-comet/issues/4558)); 
nondeterministic IV by default |
+| `aes_decrypt` | ✅ | Routed through the JVM codegen dispatcher |
+| `aes_encrypt` | ✅ | Routed through the JVM codegen dispatcher; 
nondeterministic IV by default |
 | `assert_true` | 🔜 | Lowers to `RaiseError`, which falls back |
 | `current_catalog` | ✅ | Resolved to a literal by the analyzer 
(`ReplaceCurrentLike`) |
 | `current_database` | ✅ | Resolved to a literal by the analyzer 
(`ReplaceCurrentLike`) |
@@ -485,7 +485,7 @@ All higher-order functions are planned via 
[#4224](https://github.com/apache/dat
 | `session_user` | ✅ | Alias of `current_user`; resolved to a literal by the 
analyzer |
 | `spark_partition_id` | ✅ |  |
 | `to_variant_object` | 🔜 | tracking 
[#4098](https://github.com/apache/datafusion-comet/issues/4098) |
-| `try_aes_decrypt` | 🔜 | Falls back; planned via codegen dispatch 
([#4558](https://github.com/apache/datafusion-comet/issues/4558)) |
+| `try_aes_decrypt` | ✅ | Routed through the JVM codegen dispatcher |
 | `try_parse_json` | 🔜 | tracking 
[#4098](https://github.com/apache/datafusion-comet/issues/4098) |
 | `try_variant_get` | 🔜 | tracking 
[#4098](https://github.com/apache/datafusion-comet/issues/4098) |
 | `typeof` | ✅ | Foldable; resolved to a literal before Comet sees the plan |
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala 
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 385528a2eb..47525f1e4a 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -285,6 +285,7 @@ object QueryPlanSerde extends Logging with CometExprShim 
with CometTypeShim {
     classOf[SparkPartitionID] -> CometSparkPartitionId,
     classOf[SortOrder] -> CometSortOrder,
     classOf[StaticInvoke] -> CometStaticInvoke,
+    classOf[TryEval] -> CometTryEval,
     classOf[UnscaledValue] -> CometUnscaledValue)
 
   /**
diff --git a/spark/src/main/scala/org/apache/comet/serde/statics.scala 
b/spark/src/main/scala/org/apache/comet/serde/statics.scala
index b2a4b991d1..33817b7383 100644
--- a/spark/src/main/scala/org/apache/comet/serde/statics.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/statics.scala
@@ -19,7 +19,7 @@
 
 package org.apache.comet.serde
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, 
ExpressionImplUtils, Literal, UrlCodec}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, 
ExpressionImplUtils, Literal, TryEval, UrlCodec}
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils
 
@@ -38,7 +38,9 @@ object CometStaticInvoke extends 
CometExpressionSerde[StaticInvoke] {
         "read_side_padding"),
       ("isLuhnNumber", classOf[ExpressionImplUtils]) -> 
CometScalarFunction("luhn_check"),
       ("encode", UrlCodec.getClass) -> CometUrlEncodeStaticInvoke,
-      ("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke)
+      ("decode", UrlCodec.getClass) -> CometUrlDecodeStaticInvoke,
+      ("aesEncrypt", classOf[ExpressionImplUtils]) -> 
CometStaticInvokeCodegenDispatch,
+      ("aesDecrypt", classOf[ExpressionImplUtils]) -> 
CometStaticInvokeCodegenDispatch)
 
   override def convert(
       expr: StaticInvoke,
@@ -83,3 +85,9 @@ object CometUrlDecodeStaticInvoke extends 
CometExpressionSerde[StaticInvoke] {
     optExprWithFallbackReason(optExpr, expr, expr.children: _*)
   }
 }
+
+/** Routes a [[StaticInvoke]] through the JVM codegen dispatcher; used for 
AES. */
+object CometStaticInvokeCodegenDispatch extends 
CometCodegenDispatch[StaticInvoke]
+
+/** Routes [[TryEval]] through the JVM codegen dispatcher; used for 
`try_aes_decrypt`. */
+object CometTryEval extends CometCodegenDispatch[TryEval]
diff --git a/spark/src/test/resources/sql-tests/expressions/misc/aes.sql 
b/spark/src/test/resources/sql-tests/expressions/misc/aes.sql
new file mode 100644
index 0000000000..6e2cc4fba2
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/misc/aes.sql
@@ -0,0 +1,69 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- Tests for aes_encrypt and aes_decrypt (available since Spark 3.3). They 
lower to a
+-- StaticInvoke of ExpressionImplUtils.aesEncrypt / aesDecrypt; Comet routes 
both methods
+-- through the JVM codegen dispatcher (no native lowering).
+-- try_aes_decrypt (Spark 3.5+) is covered in aes_try_decrypt.sql and AES-CBC 
(Spark 3.5+) in
+-- aes_cbc.sql, both gated with MinSparkVersion.
+
+statement
+CREATE TABLE test_aes(data STRING, key STRING) USING parquet
+
+statement
+INSERT INTO test_aes VALUES
+  ('hello world', '1234567890abcdef'),
+  ('apache spark', '1234567890abcdef'),
+  ('', '1234567890abcdef'),
+  (NULL, '1234567890abcdef')
+
+-- GCM round-trip (default mode, nondeterministic IV, test via round-trip)
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key), key) AS STRING) FROM test_aes
+
+-- GCM round-trip with explicit mode
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'GCM'), key, 'GCM') AS STRING) 
FROM test_aes
+
+-- ECB round-trip (deterministic mode)
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) 
FROM test_aes
+
+-- CBC mode is covered separately in aes_cbc.sql (Spark added AES-CBC in 3.5).
+
+-- ECB direct: output is deterministic so we can compare directly to Spark
+query
+SELECT aes_encrypt(data, key, 'ECB') FROM test_aes
+
+-- aes_decrypt on ECB-encrypted column
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS STRING) 
FROM test_aes
+
+-- literal key and data (all literals, constant folding disabled in test suite)
+query
+SELECT CAST(aes_decrypt(aes_encrypt('hello', '1234567890abcdef', 'ECB'), 
'1234567890abcdef', 'ECB') AS STRING)
+
+query
+SELECT CAST(aes_decrypt(aes_encrypt(NULL, '1234567890abcdef', 'ECB'), 
'1234567890abcdef', 'ECB') AS STRING)
+
+-- 24-byte key
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef12345678', 'ECB'), 
'1234567890abcdef12345678', 'ECB') AS STRING) FROM test_aes
+
+-- 32-byte key
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, '1234567890abcdef1234567890abcdef', 
'ECB'), '1234567890abcdef1234567890abcdef', 'ECB') AS STRING) FROM test_aes
diff --git a/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql 
b/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql
new file mode 100644
index 0000000000..fb660b9a71
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/misc/aes_cbc.sql
@@ -0,0 +1,35 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- AES-CBC round-trip for aes_encrypt / aes_decrypt. Comet routes the 
underlying StaticInvoke
+-- through the JVM codegen dispatcher. AES-CBC was added to Spark in 3.5 
(SPARK-43042) and
+-- throws on 3.4, so this file is gated.
+-- MinSparkVersion: 3.5
+
+statement
+CREATE TABLE test_aes_cbc(data STRING, key STRING) USING parquet
+
+statement
+INSERT INTO test_aes_cbc VALUES
+  ('hello world', '1234567890abcdef'),
+  ('apache spark', '1234567890abcdef'),
+  ('', '1234567890abcdef'),
+  (NULL, '1234567890abcdef')
+
+-- CBC round-trip (nondeterministic IV, so compare via round-trip rather than 
raw ciphertext)
+query
+SELECT CAST(aes_decrypt(aes_encrypt(data, key, 'CBC'), key, 'CBC') AS STRING) 
FROM test_aes_cbc
diff --git 
a/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql 
b/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql
new file mode 100644
index 0000000000..20e1df6971
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/misc/aes_try_decrypt.sql
@@ -0,0 +1,48 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- Tests for try_aes_decrypt, which returns NULL on invalid input instead of 
throwing.
+-- try_aes_decrypt lowers to TryEval(StaticInvoke(aesDecrypt, ...)); Comet 
routes both the
+-- TryEval wrapper and the inner StaticInvoke through the JVM codegen 
dispatcher.
+-- try_aes_decrypt was added in Spark 3.5, so this file is gated.
+-- MinSparkVersion: 3.5
+
+statement
+CREATE TABLE test_aes_try(data STRING, key STRING) USING parquet
+
+statement
+INSERT INTO test_aes_try VALUES
+  ('hello world', '1234567890abcdef'),
+  ('apache spark', '1234567890abcdef'),
+  ('', '1234567890abcdef'),
+  (NULL, '1234567890abcdef')
+
+-- invalid ciphertext returns NULL instead of throwing
+query
+SELECT try_aes_decrypt(CAST('garbage' AS BINARY), key) FROM test_aes_try
+
+-- valid ciphertext decrypts correctly
+query
+SELECT CAST(try_aes_decrypt(aes_encrypt(data, key, 'ECB'), key, 'ECB') AS 
STRING) FROM test_aes_try
+
+-- literal invalid ciphertext
+query
+SELECT try_aes_decrypt(CAST('not_valid_ciphertext' AS BINARY), 
'1234567890abcdef')
+
+-- NULL data
+query
+SELECT try_aes_decrypt(NULL, '1234567890abcdef')


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to