This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 20469d4 [SPARK-28189][SQL] Use semanticEquals in Dataset drop method
for attributes comparison
20469d4 is described below
commit 20469d43eb22c832b4d8f30c9e611ef622c3366d
Author: Tony Zhang <[email protected]>
AuthorDate: Sat Jul 6 21:39:04 2019 -0700
[SPARK-28189][SQL] Use semanticEquals in Dataset drop method for attributes
comparison
## What changes were proposed in this pull request?
In Dataset drop(col: Column) method, the `equals` comparison method was
used instead of `semanticEquals`, which caused the problem of abnormal
case-sensitivity behavior. When attributes of LogicalPlan are checked for
equality, `semanticEquals` should be used instead.
A similar PR I referred to: https://github.com/apache/spark/pull/22713
created by mgaido91
## How was this patch tested?
- Added new unit test case in DataFrameSuite
- ./build/sbt "testOnly org.apache.spark.sql.*"
- The python code from ticket reporter at
https://issues.apache.org/jira/browse/SPARK-28189
Closes #25055 from Tonix517/SPARK-28189.
Authored-by: Tony Zhang <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
.../org/apache/spark/sql/DataFrameSuite.scala | 23 ++++++++++++++++++++++
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 147222c..ef03a09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2322,7 +2322,7 @@ class Dataset[T] private[sql](
}
val attrs = this.logicalPlan.output
val colsAfterDrop = attrs.filter { attr =>
- attr != expression
+ !attr.semanticEquals(expression)
}.map(attr => Column(attr))
select(colsAfterDrop : _*)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d15c1f4..9893670 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -572,6 +572,29 @@ class DataFrameSuite extends QueryTest with
SharedSQLContext {
assert(df.schema.map(_.name) === Seq("value"))
}
+ test("SPARK-28189 drop column using drop with column reference with
case-insensitive names") {
+ // With SQL config caseSensitive OFF, case insensitive column name should
work
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+ val col1 = testData("KEY")
+ val df1 = testData.drop(col1)
+ checkAnswer(df1, testData.selectExpr("value"))
+ assert(df1.schema.map(_.name) === Seq("value"))
+
+ val col2 = testData("Key")
+ val df2 = testData.drop(col2)
+ checkAnswer(df2, testData.selectExpr("value"))
+ assert(df2.schema.map(_.name) === Seq("value"))
+ }
+
+ // With SQL config caseSensitive ON, AnalysisException should be thrown
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ val e = intercept[AnalysisException] {
+ testData("KEY")
+ }.getMessage
+ assert(e.contains("Cannot resolve column name"))
+ }
+ }
+
test("drop unknown column (no-op) with column reference") {
val col = Column("random")
val df = testData.drop(col)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]