yaooqinn commented on a change in pull request #31281:
URL: https://github.com/apache/spark/pull/31281#discussion_r563667073
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
##########
@@ -37,31 +37,134 @@ trait CharVarcharTestSuite extends QueryTest with
SQLTestUtils {
assert(CharVarcharUtils.getRawType(f.metadata) == Some(dt))
}
- test("char type values should be padded: top-level columns") {
+ test("char type values should be padded or trimmed: top-level columns") {
withTable("t") {
sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format")
- sql("INSERT INTO t VALUES ('1', 'a')")
- checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
- checkColType(spark.table("t").schema(1), CharType(5))
+ (0 to 5).map(n => "a" + " " * n).foreach { v =>
+ sql(s"INSERT OVERWRITE t VALUES ('1', '$v')")
+ checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
+ checkColType(spark.table("t").schema(1), CharType(5))
+ }
+
+ sql("INSERT OVERWRITE t VALUES ('1', null)")
+ checkAnswer(spark.table("t"), Row("1", null))
+ }
+ }
+
+ test("char type values should be padded or trimmed: partitioned columns") {
+ withTable("t") {
+ sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY
(c)")
+ (0 to 5).map(n => "a" + " " * n).foreach { v =>
+ sql(s"INSERT OVERWRITE t VALUES ('1', '$v')")
+ checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
+ checkColType(spark.table("t").schema(1), CharType(5))
+ }
+ }
+
+ withTable("t") {
+ sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY
(c)")
+ (0 to 5).map(n => "a" + " " * n).foreach { v =>
+ sql(s"INSERT INTO t VALUES ('1', '$v')")
+ checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
+ sql(s"ALTER TABLE t DROP PARTITION(c='$v')")
+ checkAnswer(spark.table("t"), Nil)
+ }
sql("INSERT OVERWRITE t VALUES ('1', null)")
checkAnswer(spark.table("t"), Row("1", null))
}
+
+ withTable("t") {
+ sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY
(c)")
+ (0 to 5).map(n => "a" + " " * n).foreach { v =>
+ sql(s"INSERT INTO t VALUES ('1', '$v')")
+ sql(s"ALTER TABLE t DROP PARTITION(c='a')")
+ checkAnswer(spark.table("t"), Nil)
+ }
+ }
}
- test("char type values should be padded: partitioned columns") {
+ test("char type values should be padded or trimmed: static partitioned
columns") {
withTable("t") {
sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY
(c)")
- sql("INSERT INTO t VALUES ('1', 'a')")
- checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
- checkColType(spark.table("t").schema(1), CharType(5))
+ (0 to 5).map(n => "a" + " " * n).foreach { v =>
+ sql(s"INSERT INTO t PARTITION (c ='$v') VALUES ('1')")
+ checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
+ checkColType(spark.table("t").schema(1), CharType(5))
+ sql(s"ALTER TABLE t DROP PARTITION(c='$v')")
Review comment:
According to SQL standard, for varchar type, the tailing spaces within
the length limit is valid, so this case should remain as it is.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]