KurtYoung commented on a change in pull request #8499: [FLINK-12575]
[table-planner-blink] Introduce planner rules to remove redundant shuffle and
collation
URL: https://github.com/apache/flink/pull/8499#discussion_r287198770
##########
File path:
flink-table/flink-table-planner-blink/src/main/scala/org/apache/flink/table/plan/nodes/physical/batch/BatchExecHashAggregate.scala
##########
@@ -94,6 +99,53 @@ class BatchExecHashAggregate(
isGlobal = true))
}
+ override def satisfyTraits(requiredTraitSet: RelTraitSet): RelNode = {
+ val requiredDistribution =
requiredTraitSet.getTrait(FlinkRelDistributionTraitDef.INSTANCE)
+ val canSatisfy = requiredDistribution.getType match {
+ case SINGLETON => grouping.length == 0
+ case HASH_DISTRIBUTED =>
+ val shuffleKeys = requiredDistribution.getKeys
+ val groupKeysList = ImmutableIntList.of(grouping.indices.toArray: _*)
+ if (requiredDistribution.requireStrict) {
+ shuffleKeys == groupKeysList
+ } else if (Util.startsWith(shuffleKeys, groupKeysList)) {
+ // If required distribution is not strict, Hash[a] can satisfy
Hash[a, b].
+ // so return true if shuffleKeys(Hash[a, b]) start with
groupKeys(Hash[a])
+ true
+ } else {
+ // If partialKey is enabled, try to use partial key to satisfy the
required distribution
+ val tableConfig = FlinkRelOptUtil.getTableConfigFromContext(this)
+ val partialKeyEnabled = tableConfig.getConf.getBoolean(
+ PlannerConfigOptions.SQL_OPTIMIZER_SHUFFLE_PARTIAL_KEY_ENABLED)
+ partialKeyEnabled && groupKeysList.containsAll(shuffleKeys)
+ }
+ case _ => false
+ }
+ if (!canSatisfy) {
+ return null
+ }
+
+ val inputRequiredDistribution = requiredDistribution.getType match {
+ case SINGLETON => requiredDistribution
+ case HASH_DISTRIBUTED =>
+ val shuffleKeys = requiredDistribution.getKeys
+ val groupKeysList = ImmutableIntList.of(grouping.indices.toArray: _*)
+ if (requiredDistribution.requireStrict) {
+ FlinkRelDistribution.hash(grouping, requireStrict = true)
+ } else if (Util.startsWith(shuffleKeys, groupKeysList)) {
+ // Hash[a] can satisfy Hash[a, b]
+ FlinkRelDistribution.hash(grouping, requireStrict = false)
+ } else {
+ // use partial key to satisfy the required distribution
+ FlinkRelDistribution.hash(shuffleKeys.map(grouping(_)).toArray,
requireStrict = false)
Review comment:
why `shuffleKeys.map(grouping(_))`?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services