Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/22326#discussion_r220586456
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
---
@@ -100,6 +104,28 @@ class BatchEvalPythonExecSuite extends SparkPlanTest
with SharedSQLContext {
}
assert(qualifiedPlanNodes.size == 1)
}
+
+ test("SPARK-25314: Python UDF refers to the attributes from more than
one child " +
+ "in join condition") {
+ def dummyPythonUDFTest(): Unit = {
+ val df = Seq(("Hello", 4)).toDF("a", "b")
+ val df2 = Seq(("Hello", 4)).toDF("c", "d")
+ val joinDF = df.join(df2,
+ dummyPythonUDF(col("a"), col("c")) === dummyPythonUDF(col("d"),
col("c")))
+ val qualifiedPlanNodes = joinDF.queryExecution.executedPlan.collect {
+ case b: BatchEvalPythonExec => b
+ }
+ assert(qualifiedPlanNodes.size == 1)
+ }
+ // Test without spark.sql.crossJoin.enabled set
+ val errMsg = intercept[AnalysisException] {
+ dummyPythonUDFTest()
+ }
+ assert(errMsg.getMessage.startsWith("Detected implicit cartesian
product"))
+ // Test with spark.sql.crossJoin.enabled=true
+ spark.conf.set("spark.sql.crossJoin.enabled", "true")
--- End diff --
please use `withSQLConf`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]