szehon-ho commented on code in PR #42306:
URL: https://github.com/apache/spark/pull/42306#discussion_r1304977700
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala:
##########
@@ -701,41 +705,78 @@ case class KeyGroupedShuffleSpec(
case otherSpec @ KeyGroupedShuffleSpec(otherPartitioning,
otherDistribution) =>
distribution.clustering.length == otherDistribution.clustering.length &&
numPartitions == other.numPartitions && areKeysCompatible(otherSpec) &&
-
partitioning.partitionValues.zip(otherPartitioning.partitionValues).forall {
- case (left, right) =>
- InternalRowComparableWrapper(left, partitioning.expressions)
- .equals(InternalRowComparableWrapper(right,
partitioning.expressions))
- }
+ isPartitioningCompatible(otherPartitioning)
case ShuffleSpecCollection(specs) =>
specs.exists(isCompatibleWith)
case _ => false
}
+ def isPartitioningCompatible(otherPartitioning: KeyGroupedPartitioning):
Boolean = {
+ val clusterKeySize = keyPositions.size
+ partitioning.partitionValues.zip(otherPartitioning.partitionValues)
+ .forall {
+ case (left, right) =>
+ val leftTypes = partitioning.expressions.map(_.dataType)
Review Comment:
Got rid of this actually.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -1500,6 +1500,18 @@ object SQLConf {
.booleanConf
.createWithDefault(false)
+ val V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS =
+
buildConf("spark.sql.sources.v2.bucketing.allowJoinKeysSubsetOfPartitionKeys.enabled")
+ .doc("Whether to allow storage-partition join in the case where join
keys are" +
+ "a subset of the partition keys of the source tables. At planning
time, " +
+ "Spark will group the partitions by only those keys that are in the
join keys." +
+ "This is currently enabled only if
spark.sql.sources.v2.bucketing.pushPartValues.enabled " +
+ "is also enabled."
+ )
+ .version("3.5.0")
+ .booleanConf
+ .createWithDefault(true)
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]