Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/23057#discussion_r234412635 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala --- @@ -119,7 +139,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper { // (A.A1 = B.B1 OR ISNULL(A.A1 = B.B1)) AND (B.B2 = A.A2) AND B.B3 > 1 val finalJoinCond = (nullAwareJoinConds ++ conditions).reduceLeft(And) // Deduplicate conflicting attributes if any. - dedupJoin(Join(outerPlan, sub, LeftAnti, Option(finalJoinCond))) + dedupJoin(Join(outerPlan, newSub, LeftAnti, Option(finalJoinCond))) case (p, predicate) => val (newCond, inputPlan) = rewriteExistentialExpr(Seq(predicate), p) Project(p.output, Filter(newCond.get, inputPlan)) --- End diff -- Can you try this test case? ```scala val df1 = spark.sql( """ |SELECT id,num,source FROM ( | SELECT id, num, 'a' as source FROM a | UNION ALL | SELECT id, num, 'b' as source FROM b |) AS c WHERE c.id IN (SELECT id FROM b WHERE num = 2) OR |c.id IN (SELECT id FROM b WHERE num = 3) """.stripMargin) ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org