Github user xuanyuanking commented on a diff in the pull request:
https://github.com/apache/spark/pull/22955#discussion_r232163956
--- Diff:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullOutPythonUDFInJoinConditionSuite.scala
---
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.scalatest.Matchers._
+
+import org.apache.spark.api.python.PythonEvalType
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.PythonUDF
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation,
LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.internal.SQLConf._
+import org.apache.spark.sql.types.BooleanType
+
+class PullOutPythonUDFInJoinConditionSuite extends PlanTest {
+
+ object Optimize extends RuleExecutor[LogicalPlan] {
+ val batches =
+ Batch("Extract PythonUDF From JoinCondition", Once,
+ PullOutPythonUDFInJoinCondition) ::
+ Batch("Check Cartesian Products", Once,
+ CheckCartesianProducts) :: Nil
+ }
+
+ val testRelationLeft = LocalRelation('a.int, 'b.int)
+ val testRelationRight = LocalRelation('c.int, 'd.int)
+
+ // Dummy python UDF for testing. Unable to execute.
+ val pythonUDF = PythonUDF("pythonUDF", null,
+ BooleanType,
+ Seq.empty,
+ PythonEvalType.SQL_BATCHED_UDF,
+ udfDeterministic = true)
+
+ val notSupportJoinTypes = Seq(LeftOuter, RightOuter, FullOuter, LeftAnti)
+
+ test("inner join condition with python udf only") {
+ val query = testRelationLeft.join(
+ testRelationRight,
+ joinType = Inner,
+ condition = Some(pythonUDF))
+ val expected = testRelationLeft.join(
+ testRelationRight,
+ joinType = Inner,
+ condition = None).where(pythonUDF).analyze
+
+ // AnalysisException thrown by CheckCartesianProducts while
spark.sql.crossJoin.enabled=false
+ val exception = the [AnalysisException] thrownBy {
+ Optimize.execute(query.analyze)
+ }
+ assert(exception.message.startsWith("Detected implicit cartesian
product"))
+
+ // pull out the python udf while set spark.sql.crossJoin.enabled=true
+ withSQLConf(CROSS_JOINS_ENABLED.key -> "true") {
+ val optimized = Optimize.execute(query.analyze)
+ comparePlans(optimized, expected)
+ }
+ }
+
+ test("left semi join condition with python udf only") {
+ val query = testRelationLeft.join(
+ testRelationRight,
+ joinType = LeftSemi,
+ condition = Some(pythonUDF))
+ val expected = testRelationLeft.join(
+ testRelationRight,
+ joinType = Inner,
+ condition = None).where(pythonUDF).select('a, 'b).analyze
+
+ // AnalysisException thrown by CheckCartesianProducts while
spark.sql.crossJoin.enabled=false
+ val exception = the [AnalysisException] thrownBy {
+ Optimize.execute(query.analyze)
+ }
+ assert(exception.message.startsWith("Detected implicit cartesian
product"))
+
+ // pull out the python udf while set spark.sql.crossJoin.enabled=true
+ withSQLConf(CROSS_JOINS_ENABLED.key -> "true") {
+ val optimized = Optimize.execute(query.analyze)
+ comparePlans(optimized, expected)
+ }
+ }
+
+ test("python udf with other common condition") {
--- End diff --
Thanks, add more cases in 38b1555.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]