This is an automated email from the ASF dual-hosted git repository.
richox pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/auron.git
The following commit(s) were added to refs/heads/master by this push:
new b5832e5e [AURON #1961] Fix Spark 4.0+: unit test catalyst codegen
failure due to session artifact isolation (#1971)
b5832e5e is described below
commit b5832e5ead9fbac95e5e11cb621e342670055bb4
Author: yew1eb <[email protected]>
AuthorDate: Fri Jan 30 19:24:02 2026 +0800
[AURON #1961] Fix Spark 4.0+: unit test catalyst codegen failure due to
session artifact isolation (#1971)
<!--
- Start the PR title with the related issue ID, e.g. '[AURON #XXXX]
Short summary...'.
-->
# Which issue does this PR close?
Closes #1961
# Rationale for this change
Spark 4.0+ enables session artifact isolation by default(Related to
https://github.com/apache/spark/pull/48120), a feature entirely absent
in Spark 3.x. This mechanism stores Catalyst dynamically generated
inline classes (e.g.,
`org.apache.spark.sql.catalyst.expressions.Object`) in session-specific
isolated directories. The `REPL class server` cannot scan these
directories, causing class lookup failures and breaking Catalyst codegen
in unit tests. Isolation is unnecessary for unit tests (single-session
execution, no class conflict risk), so disabling it is a minimal, safe
fix.
# What changes are included in this PR?
For Spark 4.0+ only, conditionally set
`spark.sql.artifact.isolation.enabled=false` in unit test
configurations. This binds Catalyst dynamic codegen classes to the
default "default" isolation group, aligning behavior with Spark 3.x.
# Are there any user-facing changes?
No.
# How was this patch tested?
CI
---
.../test/scala/org/apache/auron/AuronFunctionSuite.scala | 8 +-------
.../src/test/scala/org/apache/auron/AuronQuerySuite.scala | 11 +----------
.../test/scala/org/apache/auron/BaseAuronSQLSuite.scala | 15 +++++++++++----
3 files changed, 13 insertions(+), 21 deletions(-)
diff --git
a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala
b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala
index 5fc20370..0a4c1203 100644
---
a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala
+++
b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala
@@ -20,7 +20,7 @@ import java.text.SimpleDateFormat
import org.apache.spark.sql.{AuronQueryTest, Row}
-import org.apache.auron.util.{AuronTestUtils, SparkVersionUtil}
+import org.apache.auron.util.AuronTestUtils
class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite {
@@ -83,9 +83,6 @@ class AuronFunctionSuite extends AuronQueryTest with
BaseAuronSQLSuite {
}
test("spark hash function") {
- // TODO: Fix flaky codegen cache failures in SPARK-4.x,
https://github.com/apache/auron/issues/1961
- assume(!SparkVersionUtil.isSparkV40OrGreater)
-
withTable("t1") {
sql("create table t1 using parquet as select array(1, 2) as arr")
val functions =
@@ -97,9 +94,6 @@ class AuronFunctionSuite extends AuronQueryTest with
BaseAuronSQLSuite {
}
test("expm1 function") {
- // TODO: Fix flaky codegen cache failures in SPARK-4.x,
https://github.com/apache/auron/issues/1961
- assume(!SparkVersionUtil.isSparkV40OrGreater)
-
withTable("t1") {
sql("create table t1(c1 double) using parquet")
sql("insert into t1 values(0.0), (1.1), (2.2)")
diff --git
a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala
b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala
index a73d17f2..e82eb78f 100644
---
a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala
+++
b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala
@@ -20,7 +20,7 @@ import org.apache.spark.sql.{AuronQueryTest, Row}
import org.apache.spark.sql.execution.joins.auron.plan.NativeBroadcastJoinExec
import org.apache.auron.spark.configuration.SparkAuronConfiguration
-import org.apache.auron.util.{AuronTestUtils, SparkVersionUtil}
+import org.apache.auron.util.AuronTestUtils
class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with
AuronSQLTestHelper {
import testImplicits._
@@ -42,9 +42,6 @@ class AuronQuerySuite extends AuronQueryTest with
BaseAuronSQLSuite with AuronSQ
}
test("test filter with year function") {
- // TODO: Fix flaky codegen cache failures in SPARK-4.x,
https://github.com/apache/auron/issues/1961
- assume(!SparkVersionUtil.isSparkV40OrGreater)
-
withTable("t1") {
sql("create table t1 using parquet as select '2024-12-18' as event_time")
checkSparkAnswerAndOperator(s"""
@@ -57,9 +54,6 @@ class AuronQuerySuite extends AuronQueryTest with
BaseAuronSQLSuite with AuronSQ
}
test("test select multiple spark ext functions with the same signature") {
- // TODO: Fix flaky codegen cache failures in SPARK-4.x,
https://github.com/apache/auron/issues/1961
- assume(!SparkVersionUtil.isSparkV40OrGreater)
-
withTable("t1") {
sql("create table t1 using parquet as select '2024-12-18' as event_time")
checkSparkAnswerAndOperator("select year(event_time), month(event_time)
from t1")
@@ -177,9 +171,6 @@ class AuronQuerySuite extends AuronQueryTest with
BaseAuronSQLSuite with AuronSQ
}
test("floor function with long input") {
- // TODO: Fix flaky codegen cache failures in SPARK-4.x,
https://github.com/apache/auron/issues/1961
- assume(!SparkVersionUtil.isSparkV40OrGreater)
-
withTable("t1") {
sql("create table t1 using parquet as select 1L as c1, 2.2 as c2")
checkSparkAnswerAndOperator("select floor(c1), floor(c2) from t1")
diff --git
a/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala
b/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala
index 587d8f96..a8a9c259 100644
---
a/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala
+++
b/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala
@@ -22,6 +22,8 @@ import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.auron.util.SparkVersionUtil
+
trait BaseAuronSQLSuite extends SharedSparkSession {
protected val suiteWorkspace: String = getClass.getResource("/").getPath +
"auron-tests-workdir"
protected val warehouseDir: String = suiteWorkspace + "/spark-warehouse"
@@ -49,7 +51,7 @@ trait BaseAuronSQLSuite extends SharedSparkSession {
}
override protected def sparkConf: SparkConf = {
- super.sparkConf
+ val conf = super.sparkConf
.set("spark.sql.extensions",
"org.apache.spark.sql.auron.AuronSparkSessionExtension")
.set(
"spark.shuffle.manager",
@@ -58,8 +60,13 @@ trait BaseAuronSQLSuite extends SharedSparkSession {
.set("spark.auron.enable", "true")
.set("spark.ui.enabled", "false")
.set("spark.sql.warehouse.dir", warehouseDir)
- // Avoid the code size overflow error in Spark code generation.
- .set("spark.sql.codegen.wholeStage", "false")
- .set("spark.sql.codegen.factoryMode", "NO_CODEGEN")
+
+ if (SparkVersionUtil.isSparkV40OrGreater) {
+ // Spark 4.0+: Disable session artifact isolation, align with Spark 3.x
behavior
+ // Fix Catalyst codegen failure: prevent
org.apache.spark.sql.catalyst.expressions.Object
+ // in isolated dirs from REPL classloader lookup failure
+ conf.set("spark.sql.artifact.isolation.enabled", "false")
+ }
+ conf
}
}