ulysses-you commented on a change in pull request #32816:
URL: https://github.com/apache/spark/pull/32816#discussion_r696418503
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
##########
@@ -1908,6 +1914,70 @@ class AdaptiveQueryExecSuite
}
}
+ test("SPARK-33832: Support optimize skew join even if introduce extra
shuffle") {
+ withSQLConf(
+ SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+ SQLConf.ADAPTIVE_OPTIMIZE_SKEWS_IN_REBALANCE_PARTITIONS_ENABLED.key ->
"false",
+ SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+ SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key -> "100",
+ SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "100",
+ SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM.key -> "1",
+ SQLConf.SHUFFLE_PARTITIONS.key -> "10",
+ SQLConf.ADAPTIVE_FORCE_OPTIMIZE_SKEWED_JOIN.key -> "true") {
+ withTempView("skewData1", "skewData2") {
+ spark
+ .range(0, 1000, 1, 10)
+ .selectExpr("id % 3 as key1", "id as value1")
+ .createOrReplaceTempView("skewData1")
+ spark
+ .range(0, 1000, 1, 10)
+ .selectExpr("id % 1 as key2", "id as value2")
+ .createOrReplaceTempView("skewData2")
+
+ // check if optimized skewed join does not satisfy the required
distribution
+ Seq(true, false).foreach { hasRequiredDistribution =>
+ Seq(true, false).foreach { hasPartitionNumber =>
+ val repartition = if (hasRequiredDistribution) {
+ s"/*+ repartition(${ if (hasPartitionNumber) "10," else ""}key1)
*/"
+ } else {
+ ""
+ }
+
+ // check required distribution and extra shuffle
+ val (_, adaptive1) =
+ runAdaptiveAndVerifyResult(s"SELECT $repartition key1 FROM
skewData1 " +
+ s"JOIN skewData2 ON key1 = key2 GROUP BY key1")
+ val shuffles1 = findTopLevelShuffle(adaptive1)
Review comment:
inlined this
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]