This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fb2a308aca1e [SPARK-54830][SPARK-48037][TESTS][FOLLOWUP] Disable
shuffle checksum for the test case of to avoid memory issues
fb2a308aca1e is described below
commit fb2a308aca1e7eb2ce6d9de62f5cb659b9750d74
Author: Tengfei Huang <[email protected]>
AuthorDate: Thu Jan 29 10:37:55 2026 +0800
[SPARK-54830][SPARK-48037][TESTS][FOLLOWUP] Disable shuffle checksum for
the test case of to avoid memory issues
### What changes were proposed in this pull request?
Disable shuffle checksum for test case from SPARK-48037 to avoid memory
issues.
And revert the test related java options.
### Why are the changes needed?
Fix tests.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing UT
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #54033 from ivoson/SPARK-54830-test-memory-issue.
Authored-by: Tengfei Huang <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
project/SparkBuild.scala | 4 ----
sql/core/pom.xml | 3 +--
.../spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala | 7 ++++++-
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 1cbb58fd5b29..924b4df98a56 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1322,10 +1322,6 @@ object SqlApi {
object SQL {
import BuildCommons.protoVersion
lazy val settings = Seq(
- // SPARK-54830: avoid AdaptiveQueryExecSuite OOM, since computing order
independent shuffle checksum needs more
- // memory for test case introduced by SPARK-48037 which set shuffle
partition to 16777216
- // It needs to be consistent with the configuration of the
`scalatest-maven-plugin` in `sql/core/pom.xml`.
- (Test / javaOptions) += "-Xmx6g",
// Setting version for the protobuf compiler. This has to be propagated to
every sub-project
// even if the project is not using it.
PB.protocVersion := BuildCommons.protoVersion,
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7f321677ff2f..ab6a8f8182e6 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -344,8 +344,7 @@
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
- <!-- SPARK-54830: Increase `-Xmx` to 6g to prevent OOM during
testing, ensuring consistency with the SQL module configuration in
`SparkBuild.scala`.-->
- <argLine>-ea -Xmx6g -Xss4m
-XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs}
-Dio.netty.tryReflectionSetAccessible=true</argLine>
+ <argLine>-ea -Xmx4g -Xss4m
-XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs}
-Dio.netty.tryReflectionSetAccessible=true</argLine>
</configuration>
</plugin>
<plugin>
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 3e7d26f74bd4..06ed672d5815 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -2696,10 +2696,15 @@ class AdaptiveQueryExecSuite
test("SPARK-48037: Fix SortShuffleWriter lacks shuffle write related metrics
" +
"resulting in potentially inaccurate data") {
withTable("t3") {
+ // It would take many extra memory to keep track the checksums for large
number of shuffle
+ // partitions, which is 16777216 in this case. Instead of keep
increasing the test memory in
+ // CI jobs, disable order independent shuffle checksum to avoid OOM
during test.
withSQLConf(
SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
SQLConf.SHUFFLE_PARTITIONS.key -> (SortShuffleManager
- .MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE + 1).toString) {
+ .MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE + 1).toString,
+ SQLConf.SHUFFLE_ORDER_INDEPENDENT_CHECKSUM_ENABLED.key -> "false",
+ SQLConf.SHUFFLE_CHECKSUM_MISMATCH_FULL_RETRY_ENABLED.key -> "false") {
sql("CREATE TABLE t3 USING PARQUET AS SELECT id FROM range(2)")
val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]