eejbyfeldt commented on code in PR #43435:
URL: https://github.com/apache/spark/pull/43435#discussion_r1368213333
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala:
##########
@@ -295,19 +284,53 @@ case class HashPartitioning(expressions: Seq[Expression],
numPartitions: Int)
}
}
- override def createShuffleSpec(distribution: ClusteredDistribution):
ShuffleSpec =
- HashShuffleSpec(this, distribution)
-
/**
* Returns an expression that will produce a valid partition ID(i.e.
non-negative and is less
* than numPartitions) based on hashing expressions.
*/
def partitionIdExpression: Expression = Pmod(new Murmur3Hash(expressions),
Literal(numPartitions))
+}
+
+/**
+ * Represents a partitioning where rows are split up across partitions based
on the hash
+ * of `expressions`. All rows where `expressions` evaluate to the same values
are guaranteed to be
+ * in the same partition.
+ *
+ * Since [[StatefulOpClusteredDistribution]] relies on this partitioning and
Spark requires
+ * stateful operators to retain the same physical partitioning during the
lifetime of the query
+ * (including restart), the result of evaluation on `partitionIdExpression`
must be unchanged
+ * across Spark versions. Violation of this requirement may bring silent
correctness issue.
+ */
+case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
+ extends HashPartitioningBase {
+
+ override def createShuffleSpec(distribution: ClusteredDistribution):
HashShuffleSpec =
+ HashShuffleSpec(this, distribution)
override protected def withNewChildrenInternal(
newChildren: IndexedSeq[Expression]): HashPartitioning = copy(expressions
= newChildren)
}
+case class CoalescedBoundary(startReducerIndex: Int, endReducerIndex: Int)
+
+/**
+ * Represents a partitioning where partitions have been coalesced from a
HashPartitioning into a
+ * fewer number of partitions.
+ */
+case class CoalescedHashPartitioning(from: HashPartitioning, partitions:
Seq[CoalescedBoundary])
Review Comment:
I added an override for `stringArgs` Is that the desired way to address
this?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]