[
https://issues.apache.org/jira/browse/SPARK-34581?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Yuming Wang updated SPARK-34581:
--------------------------------
Description:
BoundAttribute issue will occur after optimization by BooleanSimplification and
PushFoldableIntoBranches. How to reproduce this issue:
{code:scala}
spark.sql("CREATE TABLE t1 (a INT, b INT) USING parquet")
spark.sql("CREATE TABLE t2 (a INT, b INT) USING parquet")
spark.sql(
"""
|SELECT cnt,
| NOT ( buyer_id ) AS buyer_id2
|FROM (SELECT t1.a IS NOT NULL AS buyer_id,
| Count(*) AS cnt
| FROM t1
| INNER JOIN t2
| ON t1.a = t2.a
| GROUP BY 1) t
|""".stripMargin).collect()
{code}
{noformat}
Couldn't find a#4 in [CASE WHEN isnotnull(a#4) THEN 1 ELSE 2 END#10,count(1)#3L]
java.lang.IllegalStateException: Couldn't find a#4 in [CASE WHEN isnotnull(a#4)
THEN 1 ELSE 2 END#10,count(1)#3L]
at
org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:80)
at
org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:73)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:316)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:316)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:321)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:406)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:242)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:404)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357)
{noformat}
Another case:
{code:scala}
spark.sql(
"""
|SELECT cnt,
| CASE WHEN ( buyer_id = 2 AND cnt > 3 ) THEN 2 ELSE 3 END AS
buyer_id2
|FROM (SELECT CASE WHEN t1.a IS NOT NULL THEN 1 ELSE 2 END AS buyer_id,
Count(*) AS cnt
| FROM t1 INNER JOIN t2 ON t1.a = t2.a
| GROUP BY 1) t
|""".stripMargin).collect()
{code}
was:
BoundAttribute issue after optimized by BooleanSimplification and
PushFoldableIntoBranches. How to reproduce this issue:
{code:scala}
spark.sql("CREATE TABLE t1 (a INT, b INT) USING parquet")
spark.sql("CREATE TABLE t2 (a INT, b INT) USING parquet")
spark.sql(
"""
|SELECT cnt,
| NOT ( buyer_id ) AS buyer_id2
|FROM (SELECT t1.a IS NOT NULL AS buyer_id,
| Count(*) AS cnt
| FROM t1
| INNER JOIN t2
| ON t1.a = t2.a
| GROUP BY 1) t
|""".stripMargin).collect()
{code}
{noformat}
Couldn't find a#4 in [CASE WHEN isnotnull(a#4) THEN 1 ELSE 2 END#10,count(1)#3L]
java.lang.IllegalStateException: Couldn't find a#4 in [CASE WHEN isnotnull(a#4)
THEN 1 ELSE 2 END#10,count(1)#3L]
at
org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:80)
at
org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:73)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:316)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:316)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:321)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:406)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:242)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:404)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357)
{noformat}
Another case:
{code:scala}
spark.sql(
"""
|SELECT cnt,
| CASE WHEN ( buyer_id = 2 AND cnt > 3 ) THEN 2 ELSE 3 END AS
buyer_id2
|FROM (SELECT CASE WHEN t1.a IS NOT NULL THEN 1 ELSE 2 END AS buyer_id,
Count(*) AS cnt
| FROM t1 INNER JOIN t2 ON t1.a = t2.a
| GROUP BY 1) t
|""".stripMargin).collect()
{code}
> BoundAttribute issue will occur after optimizated by BooleanSimplification
> and PushFoldableIntoBranches
> -------------------------------------------------------------------------------------------------------
>
> Key: SPARK-34581
> URL: https://issues.apache.org/jira/browse/SPARK-34581
> Project: Spark
> Issue Type: Sub-task
> Components: SQL
> Affects Versions: 3.2.0
> Reporter: Yuming Wang
> Priority: Major
>
> BoundAttribute issue will occur after optimization by BooleanSimplification
> and PushFoldableIntoBranches. How to reproduce this issue:
> {code:scala}
> spark.sql("CREATE TABLE t1 (a INT, b INT) USING parquet")
> spark.sql("CREATE TABLE t2 (a INT, b INT) USING parquet")
> spark.sql(
> """
> |SELECT cnt,
> | NOT ( buyer_id ) AS buyer_id2
> |FROM (SELECT t1.a IS NOT NULL AS buyer_id,
> | Count(*) AS cnt
> | FROM t1
> | INNER JOIN t2
> | ON t1.a = t2.a
> | GROUP BY 1) t
> |""".stripMargin).collect()
> {code}
> {noformat}
> Couldn't find a#4 in [CASE WHEN isnotnull(a#4) THEN 1 ELSE 2
> END#10,count(1)#3L]
> java.lang.IllegalStateException: Couldn't find a#4 in [CASE WHEN
> isnotnull(a#4) THEN 1 ELSE 2 END#10,count(1)#3L]
> at
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:80)
> at
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:73)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:316)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:316)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:321)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:406)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:242)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:404)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:357)
> {noformat}
> Another case:
> {code:scala}
> spark.sql(
> """
> |SELECT cnt,
> | CASE WHEN ( buyer_id = 2 AND cnt > 3 ) THEN 2 ELSE 3 END AS
> buyer_id2
> |FROM (SELECT CASE WHEN t1.a IS NOT NULL THEN 1 ELSE 2 END AS buyer_id,
> Count(*) AS cnt
> | FROM t1 INNER JOIN t2 ON t1.a = t2.a
> | GROUP BY 1) t
> |""".stripMargin).collect()
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]