This is an automated email from the ASF dual-hosted git repository.
ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 1003f52 [KYUUBI #1321] Add config to control if zorder using global
sort
1003f52 is described below
commit 1003f5296cb3324ecc3a8bf4fa698714d94a45f8
Author: ulysses-you <[email protected]>
AuthorDate: Tue Nov 2 19:17:11 2021 +0800
[KYUUBI #1321] Add config to control if zorder using global sort
<!--
Thanks for sending a pull request!
Here are some tips for you:
1. If this is your first time, please read our contributor guidelines:
https://kyuubi.readthedocs.io/en/latest/community/contributions.html
2. If the PR is related to an issue in
https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your
PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
3. If the PR is unfinished, add '[WIP]' in your PR title, e.g.,
'[WIP][KYUUBI #XXXX] Your PR title ...'.
-->
### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
1. If you add a feature, you can talk about the use case of it.
2. If you fix a bug, you can clarify why it is a bug.
-->
Add a new config `spark.sql.optimizer.zorderGlobalSort.enabled` to control
if we do a global sort using zorder. This is a trade-off with data skew if the
zorder columns has low cardinality.
### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run
test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #1321 from ulysses-you/zorder-config.
Closes #1321
422599e4 [ulysses-you] delete table
c11e2f4f [ulysses-you] doc
b984b537 [ulysses-you] Add config to control if zorder using global sort
Authored-by: ulysses-you <[email protected]>
Signed-off-by: ulysses-you <[email protected]>
---
.../org/apache/kyuubi/sql/KyuubiSQLConf.scala | 9 +++++++++
.../sql/zorder/InsertZorderBeforeWritingBase.scala | 2 +-
.../sql/zorder/ZorderSqlAstBuilderBase.scala | 3 ++-
.../scala/org/apache/spark/sql/ZorderSuite.scala | 22 ++++++++++++++++++++++
docs/sql/rules.md | 1 +
5 files changed, 35 insertions(+), 2 deletions(-)
diff --git
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala
index 89b8f2d..b70c039 100644
---
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala
+++
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala
@@ -88,6 +88,15 @@ object KyuubiSQLConf {
.booleanConf
.createWithDefault(true)
+ val ZORDER_GLOBAL_SORT_ENABLED =
+ buildConf("spark.sql.optimizer.zorderGlobalSort.enabled")
+ .doc("When true, we do a global sort using zorder. Note that, it can
cause data skew " +
+ "issue if the zorder columns have less cardinality. When false, we
only do local sort " +
+ "using zorder.")
+ .version("1.4.0")
+ .booleanConf
+ .createWithDefault(true)
+
val WATCHDOG_MAX_HIVEPARTITION =
buildConf("spark.sql.watchdog.maxHivePartitions")
.doc("Add maxHivePartitions Strategy to avoid scan excessive " +
diff --git
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWritingBase.scala
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWritingBase.scala
index e76e81f..81deea0 100644
---
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWritingBase.scala
+++
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWritingBase.scala
@@ -146,7 +146,7 @@ trait InsertZorderHelper extends Rule[LogicalPlan] with
ZorderBuilder {
}
Sort(
SortOrder(orderExpr, Ascending, NullsLast, Seq.empty) :: Nil,
- true,
+ conf.getConf(KyuubiSQLConf.ZORDER_GLOBAL_SORT_ENABLED),
plan
)
}
diff --git
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilderBase.scala
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilderBase.scala
index 33ed18a..74e7c45 100644
---
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilderBase.scala
+++
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilderBase.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
+import org.apache.kyuubi.sql.KyuubiSQLConf
import
org.apache.kyuubi.sql.zorder.ZorderSqlExtensionsParser.{BigDecimalLiteralContext,
BigIntLiteralContext, BooleanLiteralContext, DecimalLiteralContext,
DoubleLiteralContext, IntegerLiteralContext, LogicalBinaryContext,
MultipartIdentifierContext, NullLiteralContext, NumberContext,
OptimizeZorderContext, PassThroughContext, QueryContext,
SingleStatementContext, SmallIntLiteralContext, StringLiteralContext,
TinyIntLiteralContext, TypeConstructorContext, ZorderClauseContext}
abstract class ZorderSqlAstBuilderBase extends
ZorderSqlExtensionsBaseVisitor[AnyRef] {
@@ -89,7 +90,7 @@ abstract class ZorderSqlAstBuilderBase extends
ZorderSqlExtensionsBaseVisitor[An
val query =
Sort(
SortOrder(orderExpr, Ascending, NullsLast, Seq.empty) :: Nil,
- true,
+ conf.getConf(KyuubiSQLConf.ZORDER_GLOBAL_SORT_ENABLED),
Project(Seq(UnresolvedStar(None)), tableWithFilter))
buildOptimizeZorderStatement(tableIdent, query)
diff --git
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
index 47e7839..f3e6085 100644
---
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
+++
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
@@ -537,6 +537,28 @@ trait ZorderSuite extends KyuubiSparkSQLExtensionTest with
ExpressionEvalHelper
}
}
}
+
+ test("Add config to control if zorder using global sort") {
+ withTable("t") {
+ withSQLConf(KyuubiSQLConf.ZORDER_GLOBAL_SORT_ENABLED.key -> "false") {
+ sql(
+ """
+ |CREATE TABLE t (c1 int, c2 string) TBLPROPERTIES (
+ |'kyuubi.zorder.enabled'= 'true',
+ |'kyuubi.zorder.cols'= 'c1,c2')
+ |""".stripMargin)
+ val p1 = sql("OPTIMIZE t ZORDER BY c1, c2").queryExecution.analyzed
+ assert(p1.collect {
+ case shuffle: Sort if !shuffle.global => shuffle
+ }.size == 1)
+
+ val p2 = sql("INSERT INTO TABLE t SELECT * FROM
VALUES(1,'a')").queryExecution.analyzed
+ assert(p2.collect {
+ case shuffle: Sort if !shuffle.global => shuffle
+ }.size == 1)
+ }
+ }
+ }
}
class ZorderWithCodegenEnabledSuite extends ZorderSuite {
diff --git a/docs/sql/rules.md b/docs/sql/rules.md
index 0723ed4..ee7b330 100644
--- a/docs/sql/rules.md
+++ b/docs/sql/rules.md
@@ -75,4 +75,5 @@ spark.sql.optimizer.forceShuffleBeforeJoin.enabled | false |
Ensure shuffle node
spark.sql.optimizer.finalStageConfigIsolation.enabled | false | If true, the
final stage support use different config with previous stage. The prefix of
final stage config key should be `spark.sql.finalStage.`. For example, the raw
spark config: `spark.sql.adaptive.advisoryPartitionSizeInBytes`, then the final
stage config should be:
`spark.sql.finalStage.adaptive.advisoryPartitionSizeInBytes`. | 1.2.0
spark.sql.analyzer.classification.enabled | false | When true, allows Kyuubi
engine to judge this SQL's classification and set
`spark.sql.analyzer.classification` back into sessionConf. Through this
configuration item, Spark can optimizing configuration dynamic. | 1.4.0
spark.sql.optimizer.insertZorderBeforeWriting.enabled | true | When true, we
will follow target table properties to insert zorder or not. The key properties
are: 1) `kyuubi.zorder.enabled`: if this property is true, we will insert
zorder before writing data. 2) `kyuubi.zorder.cols`: string split by comma, we
will zorder by these cols. | 1.4.0
+spark.sql.optimizer.zorderGlobalSort.enabled | true | When true, we do a
global sort using zorder. Note that, it can cause data skew issue if the zorder
columns have less cardinality. When false, we only do local sort using zorder.
| 1.4.0
spark.sql.watchdog.maxHivePartitions | none | Add maxHivePartitions Strategy
to avoid scan excessive hive partitions on partitioned table, it's optional
that works with defined. | 1.4.0