viirya commented on code in PR #178:
URL:
https://github.com/apache/arrow-datafusion-comet/pull/178#discussion_r1518632003
##########
spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala:
##########
@@ -1836,6 +1838,62 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde {
}
}
+ case join: SortMergeJoinExec if isCometOperatorEnabled(op.conf,
"sort_merge_join") =>
+ // `requiredOrders` and `getKeyOrdering` are copied from Spark's
SortMergeJoinExec.
+ def requiredOrders(keys: Seq[Expression]): Seq[SortOrder] = {
+ keys.map(SortOrder(_, Ascending))
+ }
+
+ def getKeyOrdering(
+ keys: Seq[Expression],
+ childOutputOrdering: Seq[SortOrder]): Seq[SortOrder] = {
+ val requiredOrdering = requiredOrders(keys)
+ if (SortOrder.orderingSatisfies(childOutputOrdering,
requiredOrdering)) {
+ keys.zip(childOutputOrdering).map { case (key, childOrder) =>
+ val sameOrderExpressionsSet = ExpressionSet(childOrder.children)
- key
+ SortOrder(key, Ascending, sameOrderExpressionsSet.toSeq)
+ }
+ } else {
+ requiredOrdering
+ }
+ }
+
+ // TODO: Support SortMergeJoin with join condition after new
DataFusion release
+ if (join.condition.isDefined) {
+ return None
+ }
Review Comment:
I've not added join filter support in this PR. I will do it in follow up.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]