Repository: spark
Updated Branches:
  refs/heads/branch-2.0 0caab3ef2 -> e67ce4837


[SPARK-17251][SQL] Improve `OuterReference` to be `NamedExpression`

## What changes were proposed in this pull request?

Currently, `OuterReference` is not `NamedExpression`. So, it raises 
'ClassCastException` when it used in projection lists of IN correlated 
subqueries. This PR aims to support that by making `OuterReference` as 
`NamedExpression` to show correct error messages.

```scala
scala> sql("CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES 1, 2 AS t1(a)")
scala> sql("CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES 1 AS t2(b)")
scala> sql("SELECT a FROM t1 WHERE a IN (SELECT a FROM t2)").show
java.lang.ClassCastException: 
org.apache.spark.sql.catalyst.expressions.OuterReference cannot be cast to 
org.apache.spark.sql.catalyst.expressions.NamedExpression
```

## How was this patch tested?

Pass the Jenkins test with new test cases.

Author: Dongjoon Hyun <[email protected]>

Closes #16015 from dongjoon-hyun/SPARK-17251-2.

(cherry picked from commit 9c03c564605783d8e94f6795432bb59c33933e52)
Signed-off-by: Herman van Hovell <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e67ce483
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e67ce483
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e67ce483

Branch: refs/heads/branch-2.0
Commit: e67ce483724b09912b8e1c1df459237481c0e2d3
Parents: 0caab3e
Author: Dongjoon Hyun <[email protected]>
Authored: Sat Nov 26 14:57:48 2016 -0800
Committer: Herman van Hovell <[email protected]>
Committed: Sat Nov 26 14:58:36 2016 -0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  3 +-
 .../catalyst/expressions/namedExpressions.scala |  9 +++-
 .../analysis/ResolveSubquerySuite.scala         | 43 ++++++++++++++++++++
 3 files changed, 53 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e67ce483/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index fc97333..fbc4a95 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -966,7 +966,8 @@ class Analyzer(
       def failOnOuterReference(p: LogicalPlan): Unit = {
         if (p.expressions.exists(containsOuter)) {
           failAnalysis(
-            s"Correlated predicates are not supported outside of WHERE/HAVING 
clauses: $p")
+            "Expressions referencing the outer query are not supported outside 
of WHERE/HAVING " +
+              s"clauses: $p")
         }
       }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e67ce483/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 306a99d..b2c9e9b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -341,10 +341,17 @@ case class PrettyAttribute(
  * A place holder used to hold a reference that has been resolved to a field 
outside of the current
  * plan. This is used for correlated subqueries.
  */
-case class OuterReference(e: NamedExpression) extends LeafExpression with 
Unevaluable {
+case class OuterReference(e: NamedExpression)
+  extends LeafExpression with NamedExpression with Unevaluable {
   override def dataType: DataType = e.dataType
   override def nullable: Boolean = e.nullable
   override def prettyName: String = "outer"
+
+  override def name: String = e.name
+  override def qualifier: Option[String] = e.qualifier
+  override def exprId: ExprId = e.exprId
+  override def toAttribute: Attribute = e.toAttribute
+  override def newInstance(): NamedExpression = OuterReference(e.newInstance())
 }
 
 object VirtualColumn {

http://git-wip-us.apache.org/repos/asf/spark/blob/e67ce483/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
new file mode 100644
index 0000000..4aafb2b
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{In, ListQuery, 
OuterReference}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, 
Project}
+
+/**
+ * Unit tests for [[ResolveSubquery]].
+ */
+class ResolveSubquerySuite extends AnalysisTest {
+
+  val a = 'a.int
+  val b = 'b.int
+  val t1 = LocalRelation(a)
+  val t2 = LocalRelation(b)
+
+  test("SPARK-17251 Improve `OuterReference` to be `NamedExpression`") {
+    val expr = Filter(In(a, Seq(ListQuery(Project(Seq(OuterReference(a)), 
t2)))), t1)
+    val m = intercept[AnalysisException] {
+      SimpleAnalyzer.ResolveSubquery(expr)
+    }.getMessage
+    assert(m.contains(
+      "Expressions referencing the outer query are not supported outside of 
WHERE/HAVING clauses"))
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to