ulysses-you commented on code in PR #43435:
URL: https://github.com/apache/spark/pull/43435#discussion_r1368087357


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala:
##########
@@ -295,19 +284,53 @@ case class HashPartitioning(expressions: Seq[Expression], 
numPartitions: Int)
     }
   }
 
-  override def createShuffleSpec(distribution: ClusteredDistribution): 
ShuffleSpec =
-    HashShuffleSpec(this, distribution)
-
   /**
    * Returns an expression that will produce a valid partition ID(i.e. 
non-negative and is less
    * than numPartitions) based on hashing expressions.
    */
   def partitionIdExpression: Expression = Pmod(new Murmur3Hash(expressions), 
Literal(numPartitions))
+}
+
+/**
+ * Represents a partitioning where rows are split up across partitions based 
on the hash
+ * of `expressions`.  All rows where `expressions` evaluate to the same values 
are guaranteed to be
+ * in the same partition.
+ *
+ * Since [[StatefulOpClusteredDistribution]] relies on this partitioning and 
Spark requires
+ * stateful operators to retain the same physical partitioning during the 
lifetime of the query
+ * (including restart), the result of evaluation on `partitionIdExpression` 
must be unchanged
+ * across Spark versions. Violation of this requirement may bring silent 
correctness issue.
+ */
+case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
+  extends HashPartitioningBase {
+
+  override def createShuffleSpec(distribution: ClusteredDistribution): 
HashShuffleSpec =
+    HashShuffleSpec(this, distribution)
 
   override protected def withNewChildrenInternal(
     newChildren: IndexedSeq[Expression]): HashPartitioning = copy(expressions 
= newChildren)
 }
 
+case class CoalescedBoundary(startReducerIndex: Int, endReducerIndex: Int)
+
+/**
+ * Represents a partitioning where partitions have been coalesced from a 
HashPartitioning into a
+ * fewer number of partitions.
+ */
+case class CoalescedHashPartitioning(from: HashPartitioning, partitions: 
Seq[CoalescedBoundary])

Review Comment:
   We'd better to avoid show the details of `partitions` for explain and UI.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to