Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/22955#discussion_r231840717 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala --- @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.scalatest.Matchers._ + +import org.apache.spark.api.python.PythonEvalType +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.PythonUDF +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.internal.SQLConf._ +import org.apache.spark.sql.types.BooleanType + +class PullOutPythonUDFInJoinConditionSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("Extract PythonUDF From JoinCondition", Once, + PullOutPythonUDFInJoinCondition) :: + Batch("Check Cartesian Products", Once, + CheckCartesianProducts) :: Nil + } + + val testRelationLeft = LocalRelation('a.int, 'b.int) + val testRelationRight = LocalRelation('c.int, 'd.int) + + // Dummy python UDF for testing. Unable to execute. + val pythonUDF = PythonUDF("pythonUDF", null, + BooleanType, + Seq.empty, + PythonEvalType.SQL_BATCHED_UDF, + udfDeterministic = true) + + val notSupportJoinTypes = Seq(LeftOuter, RightOuter, FullOuter, LeftAnti) + + test("inner join condition with python udf only") { + val query = testRelationLeft.join( + testRelationRight, + joinType = Inner, + condition = Some(pythonUDF)) + val expected = testRelationLeft.join( + testRelationRight, + joinType = Inner, + condition = None).where(pythonUDF).analyze + + // AnalysisException thrown by CheckCartesianProducts while spark.sql.crossJoin.enabled=false + val exception = the [AnalysisException] thrownBy { + Optimize.execute(query.analyze) + } + assert(exception.message.startsWith("Detected implicit cartesian product")) + + // pull out the python udf while set spark.sql.crossJoin.enabled=true + withSQLConf(CROSS_JOINS_ENABLED.key -> "true") { + val optimized = Optimize.execute(query.analyze) + comparePlans(optimized, expected) + } + } + + test("left semi join condition with python udf only") { + val query = testRelationLeft.join( + testRelationRight, + joinType = LeftSemi, + condition = Some(pythonUDF)) + val expected = testRelationLeft.join( + testRelationRight, + joinType = Inner, + condition = None).where(pythonUDF).select('a, 'b).analyze + + // AnalysisException thrown by CheckCartesianProducts while spark.sql.crossJoin.enabled=false + val exception = the [AnalysisException] thrownBy { + Optimize.execute(query.analyze) + } + assert(exception.message.startsWith("Detected implicit cartesian product")) + + // pull out the python udf while set spark.sql.crossJoin.enabled=true + withSQLConf(CROSS_JOINS_ENABLED.key -> "true") { + val optimized = Optimize.execute(query.analyze) + comparePlans(optimized, expected) + } + } + + test("python udf with other common condition") { --- End diff -- shall we add more cases like this for `Or` instead of `And`? And with several UDF/other conditions? Thanks.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org