This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 362a4d43159d [SPARK-46949][SQL] Support CHAR/VARCHAR through ResolveDefaultColumns 362a4d43159d is described below commit 362a4d43159d73ef2e4c4f9267b572046220680f Author: Kent Yao <y...@apache.org> AuthorDate: Fri Feb 2 17:39:12 2024 +0800 [SPARK-46949][SQL] Support CHAR/VARCHAR through ResolveDefaultColumns ### What changes were proposed in this pull request? We have issues resolving column definitions with default values, i.e., `c CHAR(5) DEFAULT 'foo'`, `v VARCHAR(10) DEFAULT 'bar'`. The reason is that CAHR/VARCHAR types in schemas are not supplanted with STRING type to align with the value expression. This PR fixes these issues in `ResolveDefaultColumns`, which seems to only cover the v1 tables. When I applied some related tests to v2 tables, they had the same issues. But beyond that, there are some other front-loading needs to be addressed. In this case, I'd like to separate v2 from the v1 PR. ### Why are the changes needed? bugfix ``` spark-sql (default)> CREATE TABLE t( c CHAR(5) DEFAULT 'spark') USING parquet; [INVALID_DEFAULT_VALUE.DATA_TYPE] Failed to execute CREATE TABLE command because the destination table column `c` has a DEFAULT value 'spark', which requires "CHAR(5)" type, but the statement provided a value of incompatible "STRING" type. ``` ### Does this PR introduce _any_ user-facing change? no, bugfix ### How was this patch tested? new tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #44991 from yaooqinn/SPARK-46949. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> --- .../catalyst/util/ResolveDefaultColumnsUtil.scala | 26 +++++++++----- .../spark/sql/ResolveDefaultColumnsSuite.scala | 41 ++++++++++++++++++++++ 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala index e782e017d1ef..da03de73557f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala @@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException} import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper} import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions._ @@ -42,7 +42,9 @@ import org.apache.spark.util.ArrayImplicits._ /** * This object contains fields to help process DEFAULT columns. */ -object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsUtils { +object ResolveDefaultColumns extends QueryErrorsBase + with ResolveDefaultColumnsUtils + with SQLConfHelper { // Name of attributes representing explicit references to the value stored in the above // CURRENT_DEFAULT_COLUMN_METADATA. val CURRENT_DEFAULT_COLUMN_NAME = "DEFAULT" @@ -307,25 +309,26 @@ object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsU statementType: String, colName: String, defaultSQL: String): Expression = { + val supplanted = CharVarcharUtils.replaceCharVarcharWithString(dataType) // Perform implicit coercion from the provided expression type to the required column type. - if (dataType == analyzed.dataType) { + val ret = if (supplanted == analyzed.dataType) { analyzed - } else if (Cast.canUpCast(analyzed.dataType, dataType)) { - Cast(analyzed, dataType) + } else if (Cast.canUpCast(analyzed.dataType, supplanted)) { + Cast(analyzed, supplanted) } else { // If the provided default value is a literal of a wider type than the target column, but the // literal value fits within the narrower type, just coerce it for convenience. Exclude // boolean/array/struct/map types from consideration for this type coercion to avoid // surprising behavior like interpreting "false" as integer zero. val result = if (analyzed.isInstanceOf[Literal] && - !Seq(dataType, analyzed.dataType).exists(_ match { + !Seq(supplanted, analyzed.dataType).exists(_ match { case _: BooleanType | _: ArrayType | _: StructType | _: MapType => true case _ => false })) { try { - val casted = Cast(analyzed, dataType, evalMode = EvalMode.TRY).eval() + val casted = Cast(analyzed, supplanted, evalMode = EvalMode.TRY).eval() if (casted != null) { - Some(Literal(casted, dataType)) + Some(Literal(casted, supplanted)) } else { None } @@ -339,6 +342,10 @@ object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsU statementType, colName, defaultSQL, dataType, analyzed.dataType) } } + if (!conf.charVarcharAsString && CharVarcharUtils.hasCharVarchar(dataType)) { + CharVarcharUtils.stringLengthCheck(ret, dataType).eval(EmptyRow) + } + ret } /** @@ -454,7 +461,8 @@ object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsU throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions( statement, colName, default.originalSQL) } else if (default.resolved) { - if (!Cast.canUpCast(default.child.dataType, targetType)) { + val dataType = CharVarcharUtils.replaceCharVarcharWithString(targetType) + if (!Cast.canUpCast(default.child.dataType, dataType)) { throw QueryCompilationErrors.defaultValuesDataTypeError( statement, colName, default.originalSQL, targetType, default.child.dataType) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala index 00529559a485..0b393f4b11b4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql +import org.apache.spark.SparkRuntimeException import org.apache.spark.sql.test.SharedSparkSession class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession { @@ -215,4 +216,44 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession { } } } + + test("SPARK-46949: DDL with valid default char/varchar values") { + withTable("t") { + val ddl = + s""" + |CREATE TABLE t( + | key int, + | v VARCHAR(6) DEFAULT 'apache', + | c CHAR(5) DEFAULT 'spark') + |USING parquet""".stripMargin + sql(ddl) + sql("INSERT INTO t (key) VALUES(1)") + checkAnswer(sql("select * from t"), Row(1, "apache", "spark")) + } + } + + test("SPARK-46949: DDL with invalid default char/varchar values") { + Seq("CHAR", "VARCHAR").foreach { typeName => + checkError( + exception = intercept[SparkRuntimeException]( + sql(s"CREATE TABLE t(c $typeName(3) DEFAULT 'spark') USING parquet")), + errorClass = "EXCEED_LIMIT_LENGTH", + parameters = Map("limit" -> "3")) + } + } + + test("SPARK-46949: DDL with default char/varchar values need padding") { + withTable("t") { + val ddl = + s""" + |CREATE TABLE t( + | key int, + | v VARCHAR(6) DEFAULT 'apache', + | c CHAR(6) DEFAULT 'spark') + |USING parquet""".stripMargin + sql(ddl) + sql("INSERT INTO t (key) VALUES(1)") + checkAnswer(sql("select * from t"), Row(1, "apache", "spark ")) + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org