ulysses-you commented on a change in pull request #25464: [SPARK-28746][SQL]
Add partitionby hint for sql queries
URL: https://github.com/apache/spark/pull/25464#discussion_r318427447
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
##########
@@ -143,22 +143,41 @@ object ResolveHints {
object ResolveCoalesceHints extends Rule[LogicalPlan] {
private val COALESCE_HINT_NAMES = Set("COALESCE", "REPARTITION")
+ private def createRepartitionByExpression(
+ numPartitions: Int, parameters: Seq[Any], h: UnresolvedHint):
RepartitionByExpression = {
+ val exprs = parameters.drop(1)
+ val errExprs = exprs.filter(!_.isInstanceOf[UnresolvedAttribute])
+ if (errExprs.nonEmpty) throw new AnalysisException(
+ s"""Invalid type exprs : $errExprs
+ |expects UnresolvedAttribute type
+ """.stripMargin)
+ RepartitionByExpression(
+ exprs.map(_.asInstanceOf[UnresolvedAttribute]), h.child, numPartitions)
+ }
+
def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
case h: UnresolvedHint if
COALESCE_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
val hintName = h.name.toUpperCase(Locale.ROOT)
val shuffle = hintName match {
case "REPARTITION" => true
case "COALESCE" => false
}
- val numPartitions = h.parameters match {
+
+ h.parameters match {
case Seq(IntegerLiteral(numPartitions)) =>
- numPartitions
+ Repartition(numPartitions, shuffle, h.child)
case Seq(numPartitions: Int) =>
- numPartitions
+ Repartition(numPartitions, shuffle, h.child)
+
+ case param @ Seq(IntegerLiteral(numPartitions), _*) if shuffle =>
+ createRepartitionByExpression(numPartitions, param, h)
+ case param @ Seq(numPartitions: Int, _*) if shuffle =>
+ createRepartitionByExpression(numPartitions, param, h)
+
case _ =>
- throw new AnalysisException(s"$hintName Hint expects a partition
number as parameter")
+ throw new AnalysisException("Repartition hint expects a partition
number " +
Review comment:
The test is
[here](https://github.com/apache/spark/pull/25464/files#diff-c3ddf7b5305e8fd629f36dac45318636R191).
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]