asfgit closed pull request #23353: [SPARK-26402][SQL] Accessing nested fields
with different cases in case insensitive mode
URL: https://github.com/apache/spark/pull/23353
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index fe6db8b344d3d..4d218b936b3a2 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -26,6 +26,7 @@ package org.apache.spark.sql.catalyst.expressions
*
* The following rules are applied:
* - Names and nullability hints for [[org.apache.spark.sql.types.DataType]]s
are stripped.
+ * - Names for [[GetStructField]] are stripped.
* - Commutative and associative operations ([[Add]] and [[Multiply]]) have
their children ordered
* by `hashCode`.
* - [[EqualTo]] and [[EqualNullSafe]] are reordered by `hashCode`.
@@ -37,10 +38,11 @@ object Canonicalize {
expressionReorder(ignoreNamesTypes(e))
}
- /** Remove names and nullability from types. */
+ /** Remove names and nullability from types, and names from
`GetStructField`. */
private[expressions] def ignoreNamesTypes(e: Expression): Expression = e
match {
case a: AttributeReference =>
AttributeReference("none", a.dataType.asNullable)(exprId = a.exprId)
+ case GetStructField(child, ordinal, Some(_)) => GetStructField(child,
ordinal, None)
case _ => e
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 28e6940f3cca3..9802a6e5891b8 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
class CanonicalizeSuite extends SparkFunSuite {
@@ -50,4 +51,32 @@ class CanonicalizeSuite extends SparkFunSuite {
assert(range.where(arrays1).sameResult(range.where(arrays2)))
assert(!range.where(arrays1).sameResult(range.where(arrays3)))
}
+
+ test("SPARK-26402: accessing nested fields with different cases in case
insensitive mode") {
+ val expId = NamedExpression.newExprId
+ val qualifier = Seq.empty[String]
+ val structType = StructType(
+ StructField("a", StructType(StructField("b", IntegerType, false) ::
Nil), false) :: Nil)
+
+ // GetStructField with different names are semantically equal
+ val fieldA1 = GetStructField(
+ AttributeReference("data1", structType, false)(expId, qualifier),
+ 0, Some("a1"))
+ val fieldA2 = GetStructField(
+ AttributeReference("data2", structType, false)(expId, qualifier),
+ 0, Some("a2"))
+ assert(fieldA1.semanticEquals(fieldA2))
+
+ val fieldB1 = GetStructField(
+ GetStructField(
+ AttributeReference("data1", structType, false)(expId, qualifier),
+ 0, Some("a1")),
+ 0, Some("b1"))
+ val fieldB2 = GetStructField(
+ GetStructField(
+ AttributeReference("data2", structType, false)(expId, qualifier),
+ 0, Some("a2")),
+ 0, Some("b2"))
+ assert(fieldB1.semanticEquals(fieldB2))
+ }
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index a313681eeb8f0..5794691a365a9 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -25,6 +25,7 @@ import
org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
class BinaryComparisonSimplificationSuite extends PlanTest with
PredicateHelper {
@@ -92,4 +93,33 @@ class BinaryComparisonSimplificationSuite extends PlanTest
with PredicateHelper
val correctAnswer = nonNullableRelation.analyze
comparePlans(actual, correctAnswer)
}
+
+ test("SPARK-26402: accessing nested fields with different cases in case
insensitive mode") {
+ val expId = NamedExpression.newExprId
+ val qualifier = Seq.empty[String]
+ val structType = StructType(
+ StructField("a", StructType(StructField("b", IntegerType, false) ::
Nil), false) :: Nil)
+
+ val fieldA1 = GetStructField(
+ GetStructField(
+ AttributeReference("data1", structType, false)(expId, qualifier),
+ 0, Some("a1")),
+ 0, Some("b1"))
+ val fieldA2 = GetStructField(
+ GetStructField(
+ AttributeReference("data2", structType, false)(expId, qualifier),
+ 0, Some("a2")),
+ 0, Some("b2"))
+
+ // GetStructField with different names are semantically equal; thus,
`EqualTo(fieldA1, fieldA2)`
+ // will be optimized to `TrueLiteral` by `SimplifyBinaryComparison`.
+ val originalQuery = nonNullableRelation
+ .where(EqualTo(fieldA1, fieldA2))
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery)
+ val correctAnswer = nonNullableRelation.analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 37a8815350a53..656da9fa01806 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2937,6 +2937,25 @@ class SQLQuerySuite extends QueryTest with
SharedSQLContext {
}
}
}
+
+ test("SPARK-26402: accessing nested fields with different cases in case
insensitive mode") {
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ val msg = intercept[AnalysisException] {
+ withTable("t") {
+ sql("create table t (s struct<i: Int>) using json")
+ checkAnswer(sql("select s.I from t group by s.i"), Nil)
+ }
+ }.message
+ assert(msg.contains("No such struct field I in i"))
+ }
+
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+ withTable("t") {
+ sql("create table t (s struct<i: Int>) using json")
+ checkAnswer(sql("select s.I from t group by s.i"), Nil)
+ }
+ }
+ }
}
case class Foo(bar: Option[String])
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]