This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new fdeb6db [SPARK-26402][SQL] Accessing nested fields with different
cases in case insensitive mode
fdeb6db is described below
commit fdeb6db4053a217be3082da9da105c36b9acaa7c
Author: DB Tsai <[email protected]>
AuthorDate: Sat Dec 22 10:35:14 2018 -0800
[SPARK-26402][SQL] Accessing nested fields with different cases in case
insensitive mode
## What changes were proposed in this pull request?
GetStructField with different optional names should be semantically equal.
We will use this as building block to compare the nested fields used in the
plans to be optimized by catalyst optimizer.
This PR also fixes a bug below that accessing nested fields with different
cases in case insensitive mode will result `AnalysisException`.
```
sql("create table t (s struct<i: Int>) using json")
sql("select s.I from t group by s.i")
```
which is currently failing
```
org.apache.spark.sql.AnalysisException: expression 'default.t.`s`' is
neither present in the group by, nor is it an aggregate function
```
as cloud-fan pointed out.
## How was this patch tested?
New tests are added.
Closes #23353 from dbtsai/nestedEqual.
Lead-authored-by: DB Tsai <[email protected]>
Co-authored-by: DB Tsai <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit a5a24d92bdf6e6a8e33bdc8833bedba033576b4c)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../sql/catalyst/expressions/Canonicalize.scala | 4 ++-
.../catalyst/expressions/CanonicalizeSuite.scala | 29 +++++++++++++++++++++
.../BinaryComparisonSimplificationSuite.scala | 30 ++++++++++++++++++++++
.../scala/org/apache/spark/sql/SQLQuerySuite.scala | 19 ++++++++++++++
4 files changed, 81 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index fe6db8b..4d218b9 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -26,6 +26,7 @@ package org.apache.spark.sql.catalyst.expressions
*
* The following rules are applied:
* - Names and nullability hints for [[org.apache.spark.sql.types.DataType]]s
are stripped.
+ * - Names for [[GetStructField]] are stripped.
* - Commutative and associative operations ([[Add]] and [[Multiply]]) have
their children ordered
* by `hashCode`.
* - [[EqualTo]] and [[EqualNullSafe]] are reordered by `hashCode`.
@@ -37,10 +38,11 @@ object Canonicalize {
expressionReorder(ignoreNamesTypes(e))
}
- /** Remove names and nullability from types. */
+ /** Remove names and nullability from types, and names from
`GetStructField`. */
private[expressions] def ignoreNamesTypes(e: Expression): Expression = e
match {
case a: AttributeReference =>
AttributeReference("none", a.dataType.asNullable)(exprId = a.exprId)
+ case GetStructField(child, ordinal, Some(_)) => GetStructField(child,
ordinal, None)
case _ => e
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 28e6940..9802a6e 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
class CanonicalizeSuite extends SparkFunSuite {
@@ -50,4 +51,32 @@ class CanonicalizeSuite extends SparkFunSuite {
assert(range.where(arrays1).sameResult(range.where(arrays2)))
assert(!range.where(arrays1).sameResult(range.where(arrays3)))
}
+
+ test("SPARK-26402: accessing nested fields with different cases in case
insensitive mode") {
+ val expId = NamedExpression.newExprId
+ val qualifier = Seq.empty[String]
+ val structType = StructType(
+ StructField("a", StructType(StructField("b", IntegerType, false) ::
Nil), false) :: Nil)
+
+ // GetStructField with different names are semantically equal
+ val fieldA1 = GetStructField(
+ AttributeReference("data1", structType, false)(expId, qualifier),
+ 0, Some("a1"))
+ val fieldA2 = GetStructField(
+ AttributeReference("data2", structType, false)(expId, qualifier),
+ 0, Some("a2"))
+ assert(fieldA1.semanticEquals(fieldA2))
+
+ val fieldB1 = GetStructField(
+ GetStructField(
+ AttributeReference("data1", structType, false)(expId, qualifier),
+ 0, Some("a1")),
+ 0, Some("b1"))
+ val fieldB2 = GetStructField(
+ GetStructField(
+ AttributeReference("data2", structType, false)(expId, qualifier),
+ 0, Some("a2")),
+ 0, Some("b2"))
+ assert(fieldB1.semanticEquals(fieldB2))
+ }
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index a313681..5794691 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -25,6 +25,7 @@ import
org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
class BinaryComparisonSimplificationSuite extends PlanTest with
PredicateHelper {
@@ -92,4 +93,33 @@ class BinaryComparisonSimplificationSuite extends PlanTest
with PredicateHelper
val correctAnswer = nonNullableRelation.analyze
comparePlans(actual, correctAnswer)
}
+
+ test("SPARK-26402: accessing nested fields with different cases in case
insensitive mode") {
+ val expId = NamedExpression.newExprId
+ val qualifier = Seq.empty[String]
+ val structType = StructType(
+ StructField("a", StructType(StructField("b", IntegerType, false) ::
Nil), false) :: Nil)
+
+ val fieldA1 = GetStructField(
+ GetStructField(
+ AttributeReference("data1", structType, false)(expId, qualifier),
+ 0, Some("a1")),
+ 0, Some("b1"))
+ val fieldA2 = GetStructField(
+ GetStructField(
+ AttributeReference("data2", structType, false)(expId, qualifier),
+ 0, Some("a2")),
+ 0, Some("b2"))
+
+ // GetStructField with different names are semantically equal; thus,
`EqualTo(fieldA1, fieldA2)`
+ // will be optimized to `TrueLiteral` by `SimplifyBinaryComparison`.
+ val originalQuery = nonNullableRelation
+ .where(EqualTo(fieldA1, fieldA2))
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery)
+ val correctAnswer = nonNullableRelation.analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index beb1753..806f0b2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2947,6 +2947,25 @@ class SQLQuerySuite extends QueryTest with
SharedSQLContext {
}
}
}
+
+ test("SPARK-26402: accessing nested fields with different cases in case
insensitive mode") {
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ val msg = intercept[AnalysisException] {
+ withTable("t") {
+ sql("create table t (s struct<i: Int>) using json")
+ checkAnswer(sql("select s.I from t group by s.i"), Nil)
+ }
+ }.message
+ assert(msg.contains("No such struct field I in i"))
+ }
+
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+ withTable("t") {
+ sql("create table t (s struct<i: Int>) using json")
+ checkAnswer(sql("select s.I from t group by s.i"), Nil)
+ }
+ }
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]