Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/21857#discussion_r205565280
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
---
@@ -1400,13 +1401,71 @@ object ReplaceIntersectWithSemiJoin extends
Rule[LogicalPlan] {
*/
object ReplaceExceptWithAntiJoin extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
- case Except(left, right) =>
+ case Except(left, right, false) =>
assert(left.output.size == right.output.size)
val joinCond = left.output.zip(right.output).map { case (l, r) =>
EqualNullSafe(l, r) }
Distinct(Join(left, right, LeftAnti, joinCond.reduceLeftOption(And)))
}
}
+/**
+ * Replaces logical [[ExceptAll]] operator using a combination of Union,
Aggregate
+ * and Generate operator.
+ *
+ * Input Query :
+ * {{{
+ * SELECT c1 FROM ut1 EXCEPT ALL SELECT c1 FROM ut2
+ * }}}
+ *
+ * Rewritten Query:
+ * {{{
+ * SELECT c1
+ * FROM (
+ * SELECT replicate_rows(sum_val, c1) AS (sum_val, c1)
--- End diff --
So I think here it should be `replicate_rows(sum_val, c1) AS c1`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]