This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e9b12deff41 [SPARK-42123][SQL] Include column default values in
DESCRIBE and SHOW CREATE TABLE output
e9b12deff41 is described below
commit e9b12deff41066dee28a7cdf971a3e7ef034d5f2
Author: Daniel Tenedorio <[email protected]>
AuthorDate: Wed Jan 25 14:35:29 2023 -0800
[SPARK-42123][SQL] Include column default values in DESCRIBE and SHOW
CREATE TABLE output
### What changes were proposed in this pull request?
Include column default values in DESCRIBE and SHOW CREATE TABLE output.
### Why are the changes needed?
This helps users work with tables and check their properties before
querying or modifying.
### Does this PR introduce _any_ user-facing change?
Yes, it changes the DESCRIBE and SHOW CREATE TABLE command outputs when the
table has one or more columns with default values.
### How was this patch tested?
This PR includes new unit and file-based query test coverage.
Closes #39726 from dtenedor/show-create-table-and-describe-defaults.
Authored-by: Daniel Tenedorio <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
---
.../org/apache/spark/sql/types/StructField.scala | 6 +-
.../spark/sql/execution/command/tables.scala | 17 +-
.../test/resources/sql-tests/inputs/describe.sql | 22 +++
.../sql-tests/inputs/show-create-table.sql | 8 +
.../resources/sql-tests/results/describe.sql.out | 192 +++++++++++++++++++++
.../sql-tests/results/show-create-table.sql.out | 30 ++++
.../execution/command/v1/DescribeTableSuite.scala | 49 ++++++
.../command/v1/ShowCreateTableSuite.scala | 24 +++
8 files changed, 346 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index aa038d05679..432e06a28a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -141,6 +141,10 @@ case class StructField(
}
}
+ private def getDDLDefault = getCurrentDefaultValue()
+ .map(" DEFAULT " + _)
+ .getOrElse("")
+
private def getDDLComment = getComment()
.map(escapeSingleQuotedString)
.map(" COMMENT '" + _ + "'")
@@ -160,7 +164,7 @@ case class StructField(
*/
def toDDL: String = {
val nullString = if (nullable) "" else " NOT NULL"
- s"${quoteIfNeeded(name)} ${dataType.sql}${nullString}$getDDLComment"
+ s"${quoteIfNeeded(name)}
${dataType.sql}${nullString}$getDDLDefault$getDDLComment"
}
private[sql] def toAttribute: AttributeReference =
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index b47f0e376ac..30f77b11ec0 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString,
quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils,
ResolveDefaultColumns}
+import
org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY
import
org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
import org.apache.spark.sql.execution.datasources.DataSource
@@ -287,7 +288,7 @@ case class AlterTableAddColumnsCommand(
private def constantFoldCurrentDefaultsToExistDefaults(
sparkSession: SparkSession, tableProvider: Option[String]):
Seq[StructField] = {
colsToAdd.map { col: StructField =>
- if
(col.metadata.contains(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY))
{
+ if (col.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) {
val foldedStructType =
ResolveDefaultColumns.constantFoldCurrentDefaultsToExistDefaults(
StructType(Array(col)), tableProvider, "ALTER TABLE ADD COLUMNS",
true)
foldedStructType.fields(0)
@@ -645,6 +646,16 @@ case class DescribeTableCommand(
} else if (isExtended) {
describeFormattedTableInfo(metadata, result)
}
+
+ // If any columns have default values, append them to the result.
+ if
(metadata.schema.fields.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)))
{
+ append(result, "", "", "")
+ append(result, "# Column Default Values", "", "")
+ metadata.schema.foreach { column =>
+ column.getCurrentDefaultValue().map(
+ append(result, column.name, column.dataType.simpleString, _))
+ }
+ }
}
result.toSeq
@@ -807,6 +818,10 @@ case class DescribeColumnCommand(
hist <- c.histogram
} yield histogramDescription(hist)
buffer ++= histDesc.getOrElse(Seq(Row("histogram", "NULL")))
+ val defaultKey = CURRENT_DEFAULT_COLUMN_METADATA_KEY
+ if (field.metadata.contains(defaultKey)) {
+ buffer += Row("default", field.metadata.getString(defaultKey))
+ }
}
buffer.toSeq
}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index 493ea68baed..b37931456d0 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -97,3 +97,25 @@ DROP VIEW temp_v;
DROP VIEW temp_Data_Source_View;
DROP VIEW v;
+
+-- Show column default values
+CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING
parquet COMMENT 'table_comment';
+
+DESC d;
+
+DESC EXTENDED d;
+
+DESC TABLE EXTENDED d;
+
+DESC FORMATTED d;
+
+-- Show column default values with newlines in the string
+CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT
42) USING parquet COMMENT 'table_comment';
+
+DESC e;
+
+DESC EXTENDED e;
+
+DESC TABLE EXTENDED e;
+
+DESC FORMATTED e;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
index 98bc3c5cae0..5192d2dc6b5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
@@ -45,6 +45,14 @@ SHOW CREATE TABLE tbl;
DROP TABLE tbl;
+-- default column values
+CREATE TABLE tbl (a INT DEFAULT 42, b STRING DEFAULT 'abc, def', c INT DEFAULT
42) USING parquet
+COMMENT 'This is a comment';
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
-- comment
CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
COMMENT 'This is a comment';
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 6193e1222a6..6c55bf40c9e 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -672,3 +672,195 @@ DROP VIEW v
struct<>
-- !query output
+
+
+-- !query
+CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING
parquet COMMENT 'table_comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Column Default Values
+a string 'default-value'
+b int 42
+
+
+-- !query
+DESC EXTENDED d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Detailed Table Information
+Catalog spark_catalog
+Database default
+Table d
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Comment table_comment
+Location [not included in comparison]/{warehouse_dir}/d
+
+# Column Default Values
+a string 'default-value'
+b int 42
+
+
+-- !query
+DESC TABLE EXTENDED d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Detailed Table Information
+Catalog spark_catalog
+Database default
+Table d
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Comment table_comment
+Location [not included in comparison]/{warehouse_dir}/d
+
+# Column Default Values
+a string 'default-value'
+b int 42
+
+
+-- !query
+DESC FORMATTED d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Detailed Table Information
+Catalog spark_catalog
+Database default
+Table d
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Comment table_comment
+Location [not included in comparison]/{warehouse_dir}/d
+
+# Column Default Values
+a string 'default-value'
+b int 42
+
+
+-- !query
+CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT
42) USING parquet COMMENT 'table_comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Column Default Values
+a string CONCAT('a\n b\n ', 'c\n d')
+b int 42
+
+
+-- !query
+DESC EXTENDED e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Detailed Table Information
+Catalog spark_catalog
+Database default
+Table e
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Comment table_comment
+Location [not included in comparison]/{warehouse_dir}/e
+
+# Column Default Values
+a string CONCAT('a\n b\n ', 'c\n d')
+b int 42
+
+
+-- !query
+DESC TABLE EXTENDED e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Detailed Table Information
+Catalog spark_catalog
+Database default
+Table e
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Comment table_comment
+Location [not included in comparison]/{warehouse_dir}/e
+
+# Column Default Values
+a string CONCAT('a\n b\n ', 'c\n d')
+b int 42
+
+
+-- !query
+DESC FORMATTED e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a string
+b int
+
+# Detailed Table Information
+Catalog spark_catalog
+Database default
+Table e
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type MANAGED
+Provider parquet
+Comment table_comment
+Location [not included in comparison]/{warehouse_dir}/e
+
+# Column Default Values
+a string CONCAT('a\n b\n ', 'c\n d')
+b int 42
diff --git
a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index 08c13ccc7e0..0d73960a6b3 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -181,6 +181,36 @@ struct<>
+-- !query
+CREATE TABLE tbl (a INT DEFAULT 42, b STRING DEFAULT 'abc, def', c INT DEFAULT
42) USING parquet
+COMMENT 'This is a comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SHOW CREATE TABLE tbl
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE default.tbl (
+ a INT DEFAULT 42,
+ b STRING DEFAULT 'abc, def',
+ c INT DEFAULT 42)
+USING parquet
+COMMENT 'This is a comment'
+
+
+-- !query
+DROP TABLE tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
-- !query
CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
COMMENT 'This is a comment'
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
index 84da38f5097..02cf1958b94 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -182,6 +182,27 @@ trait DescribeTableSuiteBase extends
command.DescribeTableSuiteBase
}
}
}
+
+ test("describe a column with a default value") {
+ withTable("t") {
+ sql(s"create table t(a int default 42) $defaultUsing")
+ val descriptionDf = sql("describe table extended t a")
+ QueryTest.checkAnswer(
+ descriptionDf,
+ Seq(
+ Row("col_name", "a"),
+ Row("data_type", "int"),
+ Row("comment", "NULL"),
+ Row("default", "42"),
+ Row("min", "NULL"),
+ Row("max", "NULL"),
+ Row("num_nulls", "NULL"),
+ Row("distinct_count", "NULL"),
+ Row("max_col_len", "NULL"),
+ Row("avg_col_len", "NULL"),
+ Row("histogram", "NULL")))
+ }
+ }
}
/**
@@ -225,4 +246,32 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
Row("Partition Provider", "Catalog", "")))
}
}
+
+ test("DESCRIBE TABLE EXTENDED of a table with a default column value") {
+ withTable("t") {
+ spark.sql(s"CREATE TABLE t (id bigint default 42) $defaultUsing")
+ val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED t")
+ assert(descriptionDf.schema.map { field =>
+ (field.name, field.dataType)
+ } === Seq(
+ ("col_name", StringType),
+ ("data_type", StringType),
+ ("comment", StringType)))
+ QueryTest.checkAnswer(
+ descriptionDf.filter(
+ "!(col_name in ('Created Time', 'Created By', 'Database',
'Location', " +
+ "'Provider', 'Type'))"),
+ Seq(
+ Row("id", "bigint", null),
+ Row("", "", ""),
+ Row("# Detailed Table Information", "", ""),
+ Row("Catalog", SESSION_CATALOG_NAME, ""),
+ Row("Table", "t", ""),
+ Row("Last Access", "UNKNOWN", ""),
+ Row("", "", ""),
+ Row("# Column Default Values", "", ""),
+ Row("id", "bigint", "42")
+ ))
+ }
+ }
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
index ee8aa424d5c..62e8f53d765 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
@@ -161,6 +161,30 @@ trait ShowCreateTableSuiteBase extends
command.ShowCreateTableSuiteBase
assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS
SERDE` instead"))
}
}
+
+ test("show create table with default column values") {
+ withNamespaceAndTable(ns, table) { t =>
+ sql(
+ s"""
+ |CREATE TABLE $t (
+ | a bigint NOT NULL,
+ | b bigint DEFAULT 42,
+ | c string DEFAULT 'abc, "def"' COMMENT 'comment'
+ |)
+ |USING parquet
+ |COMMENT 'This is a comment'
+ """.stripMargin)
+ val showDDL = getShowCreateDDL(t)
+ assert(showDDL === Array(
+ s"CREATE TABLE $fullName (",
+ "a BIGINT,",
+ "b BIGINT DEFAULT 42,",
+ "c STRING DEFAULT 'abc, \"def\"' COMMENT 'comment')",
+ "USING parquet",
+ "COMMENT 'This is a comment'"
+ ))
+ }
+ }
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]