This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 35ec300eaea [SPARK-37643][SQL] when charVarcharAsString is true, for
char datatype predicate query should skip rpadding rule
35ec300eaea is described below
commit 35ec300eaeabf7f6e0827f8dc9e7923969e68f00
Author: fhygh <[email protected]>
AuthorDate: Mon Apr 18 23:11:32 2022 +0800
[SPARK-37643][SQL] when charVarcharAsString is true, for char datatype
predicate query should skip rpadding rule
### What changes were proposed in this pull request?
after add ApplyCharTypePadding rule, when predicate query column data type
is char, if column value length is less then defined, will be right-padding,
then query will get incorrect result
### Why are the changes needed?
fix query incorrect issue when predicate column data type is char, so in
this case when charVarcharAsString is true, we should skip the rpadding rule.
### Does this PR introduce _any_ user-facing change?
before this fix, if we query with char data type for predicate, then we
should be careful to set charVarcharAsString to true.
### How was this patch tested?
add new UT.
Closes #36187 from fhygh/charpredicatequery.
Authored-by: fhygh <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit c1ea8b446d00dd0123a0fad93a3e143933419a76)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 +++
.../scala/org/apache/spark/sql/CharVarcharTestSuite.scala | 13 +++++++++++++
2 files changed, 16 insertions(+)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 21233e9801e..d7bf9f2571c 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -4179,6 +4179,9 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
}
override def apply(plan: LogicalPlan): LogicalPlan = {
+ if (SQLConf.get.charVarcharAsString) {
+ return plan
+ }
plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON,
IN)) {
case operator => operator.transformExpressionsUpWithPruning(
_.containsAnyPattern(BINARY_COMPARISON, IN)) {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 7be54d49a90..88041fc26a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -100,6 +100,19 @@ trait CharVarcharTestSuite extends QueryTest with
SQLTestUtils {
}
}
+ test("char type values should not be padded when charVarcharAsString is
true") {
+ withSQLConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key -> "true") {
+ withTable("t") {
+ sql(s"CREATE TABLE t(a STRING, b CHAR(5), c CHAR(5)) USING $format
partitioned by (c)")
+ sql("INSERT INTO t VALUES ('abc', 'abc', 'abc')")
+ checkAnswer(sql("SELECT b FROM t WHERE b='abc'"), Row("abc"))
+ checkAnswer(sql("SELECT b FROM t WHERE b in ('abc')"), Row("abc"))
+ checkAnswer(sql("SELECT c FROM t WHERE c='abc'"), Row("abc"))
+ checkAnswer(sql("SELECT c FROM t WHERE c in ('abc')"), Row("abc"))
+ }
+ }
+ }
+
test("varchar type values length check and trim: partitioned columns") {
(0 to 5).foreach { n =>
// SPARK-34192: we need to create a a new table for each round of test
because of
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]