This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 36d23eff4b4c [SPARK-50541] Describe Table As JSON
36d23eff4b4c is described below
commit 36d23eff4b4c3a2b8fd301672e532132c96fdd68
Author: Amanda Liu <[email protected]>
AuthorDate: Tue Jan 7 16:23:38 2025 +0800
[SPARK-50541] Describe Table As JSON
### What changes were proposed in this pull request?
Support `DESCRIBE TABLE ... [AS JSON]` to optionally display table
metadata in JSON format.
**SQL Ref Spec:**
{ DESC | DESCRIBE } [ TABLE ] [ EXTENDED | FORMATTED ] table_name { [
PARTITION clause ] | [ column_name ] } **[ AS JSON ]**
Output:
json_metadata: String
### Why are the changes needed?
The Spark SQL command `DESCRIBE TABLE` displays table metadata in a
DataFrame format geared toward human consumption. This format causes parsing
challenges, e.g. if fields contain special characters or the format changes as
new features are added.
The new `AS JSON` option would return the table metadata as a JSON string
that supports parsing via machine, while being extensible with a minimized risk
of breaking changes. It is not meant to be human-readable.
### Does this PR introduce _any_ user-facing change?
Yes, this provides a new option to display DESCRIBE TABLE metadata in JSON
format. See below (and updated golden files) for the JSON output schema:
```
{
"table_name": "<table_name>",
"catalog_name": "<catalog_name>",
"schema_name": "<innermost_schema_name>",
"namespace": ["<innermost_schema_name>"],
"type": "<table_type>",
"provider": "<provider>",
"columns": [
{
"name": "<name>",
"type": <type_json>,
"comment": "<comment>",
"nullable": <boolean>,
"default": "<default_val>"
}
],
"partition_values": {
"<col_name>": "<val>"
},
"location": "<path>",
"view_text": "<view_text>",
"view_original_text": "<view_original_text>",
"view_schema_mode": "<view_schema_mode>",
"view_catalog_and_namespace": "<view_catalog_and_namespace>",
"view_query_output_columns": ["col1", "col2"],
"owner": "<owner>",
"comment": "<comment>",
"table_properties": {
"property1": "<property1>",
"property2": "<property2>"
},
"storage_properties": {
"property1": "<property1>",
"property2": "<property2>"
},
"serde_library": "<serde_library>",
"input_format": "<input_format>",
"output_format": "<output_format>",
"num_buckets": <num_buckets>,
"bucket_columns": ["<col_name>"],
"sort_columns": ["<col_name>"],
"created_time": "<timestamp_ISO-8601>",
"last_access": "<timestamp_ISO-8601>",
"partition_provider": "<partition_provider>"
}
```
### How was this patch tested?
- Updated golden files for `describe.sql`
- Added tests in `DescribeTableParserSuite.scala`,
`DescribeTableSuite.scala`, `PlanResolutionSuite.scala`
### Was this patch authored or co-authored using generative AI tooling?
Closes #49139 from asl3/asl3/describetableasjson.
Authored-by: Amanda Liu <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 12 +
docs/sql-ref-ansi-compliance.md | 1 +
docs/sql-ref-syntax-aux-describe-table.md | 99 +++-
.../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 +
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 4 +-
.../spark/sql/errors/CompilationErrors.scala | 12 +
.../spark/sql/catalyst/catalog/interface.scala | 235 ++++++---
.../spark/sql/catalyst/parser/AstBuilder.scala | 13 +-
.../sql/catalyst/plans/DescribeCommandSchema.scala | 20 +-
.../sql/catalyst/plans/logical/v2Commands.scala | 13 +
.../spark/sql/errors/QueryCompilationErrors.scala | 4 +
.../catalyst/analysis/ResolveSessionCatalog.scala | 8 +-
.../spark/sql/execution/command/tables.scala | 259 ++++++++++
.../datasources/v2/DataSourceV2Strategy.scala | 3 +
.../sql-tests/analyzer-results/describe.sql.out | 41 ++
.../test/resources/sql-tests/inputs/describe.sql | 12 +
.../resources/sql-tests/results/describe.sql.out | 53 ++
.../sql-tests/results/keywords-enforced.sql.out | 1 +
.../resources/sql-tests/results/keywords.sql.out | 1 +
.../sql-tests/results/nonansi/keywords.sql.out | 1 +
.../sql-tests/results/show-tables.sql.out | 2 -
.../org/apache/spark/sql/SQLQueryTestHelper.scala | 13 +-
.../command/DescribeTableParserSuite.scala | 7 +
.../execution/command/PlanResolutionSuite.scala | 39 +-
.../execution/command/v1/DescribeTableSuite.scala | 558 +++++++++++++++++++++
.../ThriftServerWithSparkContextSuite.scala | 2 +-
26 files changed, 1313 insertions(+), 101 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index 52c0315bd073..97c8f059bcde 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -1155,6 +1155,13 @@
],
"sqlState" : "42623"
},
+ "DESCRIBE_JSON_NOT_EXTENDED" : {
+ "message" : [
+ "DESCRIBE TABLE ... AS JSON only supported when [EXTENDED|FORMATTED] is
specified.",
+ "For example: DESCRIBE EXTENDED <tableName> AS JSON is supported but
DESCRIBE <tableName> AS JSON is not."
+ ],
+ "sqlState" : "0A000"
+ },
"DISTINCT_WINDOW_FUNCTION_UNSUPPORTED" : {
"message" : [
"Distinct window functions are not supported: <windowExpr>."
@@ -5283,6 +5290,11 @@
"Attach a comment to the namespace <namespace>."
]
},
+ "DESC_TABLE_COLUMN_JSON" : {
+ "message" : [
+ "DESC TABLE COLUMN AS JSON not supported for individual columns."
+ ]
+ },
"DESC_TABLE_COLUMN_PARTITION" : {
"message" : [
"DESC TABLE COLUMN for a specific partition."
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 50cdcd6d0979..3b1138b9ee0e 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -568,6 +568,7 @@ Below is a list of all the keywords in Spark SQL.
|ITEMS|non-reserved|non-reserved|non-reserved|
|ITERATE|non-reserved|non-reserved|non-reserved|
|JOIN|reserved|strict-non-reserved|reserved|
+|JSON|non-reserved|non-reserved|non-reserved|
|KEYS|non-reserved|non-reserved|non-reserved|
|LANGUAGE|non-reserved|non-reserved|reserved|
|LAST|non-reserved|non-reserved|non-reserved|
diff --git a/docs/sql-ref-syntax-aux-describe-table.md
b/docs/sql-ref-syntax-aux-describe-table.md
index 4b6e1e8c3461..6a14da1e4380 100644
--- a/docs/sql-ref-syntax-aux-describe-table.md
+++ b/docs/sql-ref-syntax-aux-describe-table.md
@@ -29,16 +29,17 @@ to return the metadata pertaining to a partition or column
respectively.
### Syntax
```sql
-{ DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [
col_name ]
+{ DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [
col_name ] [ AS JSON ]
```
### Parameters
* **format**
- Specifies the optional format of describe output. If `EXTENDED` is
specified
+ Specifies the optional format of describe output. If `EXTENDED` or
`FORMATTED` is specified
then additional metadata information (such as parent database, owner, and
access time)
- is returned.
+ is returned. Also if `EXTENDED` or `FORMATTED` is specified, then the
metadata can be returned
+ in JSON format by specifying `AS JSON` at the end of the statement.
* **table_identifier**
@@ -60,8 +61,96 @@ to return the metadata pertaining to a partition or column
respectively.
and `col_name` are mutually exclusive and can not be specified together.
Currently
nested columns are not allowed to be specified.
+ JSON format is not currently supported for individual columns.
+
**Syntax:** `[ database_name. ] [ table_name. ] column_name`
+* **AS JSON**
+
+ An optional parameter to return the table metadata in JSON format. Only
supported when `EXTENDED`
+ or `FORMATTED` format is specified (both produce equivalent JSON).
+
+ **Syntax:** `[ AS JSON ]`
+
+ **Schema:**
+
+ Below is the full JSON schema.
+ In actual output, null fields are omitted and the JSON is not pretty-printed
(see Examples).
+
+ ```sql
+ {
+ "table_name": "<table_name>",
+ "catalog_name": "<catalog_name>",
+ "schema_name": "<innermost_namespace_name>",
+ "namespace": ["<namespace_names>"],
+ "type": "<table_type>",
+ "provider": "<provider>",
+ "columns": [
+ {
+ "name": "<name>",
+ "type": <type_json>,
+ "comment": "<comment>",
+ "nullable": <boolean>,
+ "default": "<default_val>"
+ }
+ ],
+ "partition_values": {
+ "<col_name>": "<val>"
+ },
+ "location": "<path>",
+ "view_text": "<view_text>",
+ "view_original_text": "<view_original_text>",
+ "view_schema_mode": "<view_schema_mode>",
+ "view_catalog_and_namespace": "<view_catalog_and_namespace>",
+ "view_query_output_columns": ["col1", "col2"],
+ "comment": "<comment>",
+ "table_properties": {
+ "property1": "<property1>",
+ "property2": "<property2>"
+ },
+ "storage_properties": {
+ "property1": "<property1>",
+ "property2": "<property2>"
+ },
+ "serde_library": "<serde_library>",
+ "input_format": "<input_format>",
+ "output_format": "<output_format>",
+ "num_buckets": <num_buckets>,
+ "bucket_columns": ["<col_name>"],
+ "sort_columns": ["<col_name>"],
+ "created_time": "<timestamp_ISO-8601>",
+ "created_by": "<created_by>",
+ "last_access": "<timestamp_ISO-8601>",
+ "partition_provider": "<partition_provider>"
+ }
+ ```
+
+ Below are the schema definitions for `<type_json>`:
+
+| Spark SQL Data Types | JSON Representation
|
+|-----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ByteType | `{ "name" : "tinyint" }`
|
+| ShortType | `{ "name" : "smallint" }`
|
+| IntegerType | `{ "name" : "int" }`
|
+| LongType | `{ "name" : "bigint" }`
|
+| FloatType | `{ "name" : "float" }`
|
+| DoubleType | `{ "name" : "double" }`
|
+| DecimalType | `{ "name" : "decimal", "precision": p, "scale": s }`
|
+| StringType | `{ "name" : "string" }`
|
+| VarCharType | `{ "name" : "varchar", "length": n }`
|
+| CharType | `{ "name" : "char", "length": n }`
|
+| BinaryType | `{ "name" : "binary" }`
|
+| BooleanType | `{ "name" : "boolean" }`
|
+| DateType | `{ "name" : "date" }`
|
+| VariantType | `{ "name" : "variant" }`
|
+| TimestampType | `{ "name" : "timestamp_ltz" }`
|
+| TimestampNTZType | `{ "name" : "timestamp_ntz" }`
|
+| YearMonthIntervalType | `{ "name" : "interval", "start_unit":
"<start_unit>", "end_unit": "<end_unit>" }`
|
+| DayTimeIntervalType | `{ "name" : "interval", "start_unit":
"<start_unit>", "end_unit": "<end_unit>" }`
|
+| ArrayType | `{ "name" : "array", "element_type": <type_json>,
"element_nullable": <boolean> }`
|
+| MapType | `{ "name" : "map", "key_type": <type_json>,
"value_type": <type_json>, "value_nullable": <boolean> }`
|
+| StructType | `{ "name" : "struct", "fields": [ {"name" :
"field1", "type" : <type_json>, “nullable”: <boolean>, "comment": “<comment>”,
"default": “<default_val>”}, ... ] }` |
+
### Examples
```sql
@@ -173,6 +262,10 @@ DESCRIBE customer salesdb.customer.name;
|data_type| string|
| comment|Short name|
+---------+----------+
+
+-- Returns the table metadata in JSON format.
+DESC FORMATTED customer AS JSON;
+{"table_name":"customer","catalog_name":"spark_catalog","schema_name":"default","namespace":["default"],"columns":[{"name":"cust_id","type":{"name":"integer"},"nullable":true},{"name":"name","type":{"name":"string"},"comment":"Short
name","nullable":true},{"name":"state","type":{"name":"varchar","length":20},"nullable":true}],"location":
"file:/tmp/salesdb.db/custom...","created_time":"2020-04-07T14:05:43Z","last_access":"UNKNOWN","created_by":"None","type":"MANAGED","provider":"parquet"
[...]
```
### Related Statements
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index 91a267364216..dafeed48aef1 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -283,6 +283,7 @@ IS: 'IS';
ITEMS: 'ITEMS';
ITERATE: 'ITERATE';
JOIN: 'JOIN';
+JSON: 'JSON';
KEYS: 'KEYS';
LANGUAGE: 'LANGUAGE';
LAST: 'LAST';
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 3ca120da98dd..667d200268cf 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -287,7 +287,7 @@ statement
| (DESC | DESCRIBE) namespace EXTENDED?
identifierReference
#describeNamespace
| (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
- identifierReference partitionSpec? describeColName?
#describeRelation
+ identifierReference partitionSpec? describeColName? (AS JSON)?
#describeRelation
| (DESC | DESCRIBE) QUERY? query
#describeQuery
| COMMENT ON namespace identifierReference IS
comment
#commentNamespace
@@ -1680,6 +1680,7 @@ ansiNonReserved
| INVOKER
| ITEMS
| ITERATE
+ | JSON
| KEYS
| LANGUAGE
| LAST
@@ -2039,6 +2040,7 @@ nonReserved
| IS
| ITEMS
| ITERATE
+ | JSON
| KEYS
| LANGUAGE
| LAST
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
index 3e63b8281f73..617cab4b2a39 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
@@ -41,6 +41,18 @@ private[sql] trait CompilationErrors extends
DataTypeErrorsBase {
cause = Option(cause))
}
+ def describeJsonNotExtendedError(tableName: String): AnalysisException = {
+ new AnalysisException(
+ errorClass = "DESCRIBE_JSON_NOT_EXTENDED",
+ messageParameters = Map("tableName" -> tableName))
+ }
+
+ def describeColJsonUnsupportedError(): AnalysisException = {
+ new AnalysisException(
+ errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON",
+ messageParameters = Map.empty)
+ }
+
def cannotFindDescriptorFileError(filePath: String, cause: Throwable):
AnalysisException = {
new AnalysisException(
errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND",
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index e199452a2da6..858e2cf25b6f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -28,7 +28,7 @@ import com.fasterxml.jackson.annotation.JsonInclude.Include
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.{ClassTagExtensions,
DefaultScalaModule}
import org.apache.commons.lang3.StringUtils
-import org.json4s.JsonAST.{JArray, JString}
+import org.json4s.JsonAST.{JArray, JBool, JDouble, JInt, JNull, JObject,
JString, JValue}
import org.json4s.jackson.JsonMethods._
import org.apache.spark.SparkException
@@ -51,6 +51,48 @@ import org.apache.spark.sql.types._
import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
import org.apache.spark.util.ArrayImplicits._
+/**
+ * Interface providing util to convert JValue to String representation of
catalog entities.
+ */
+trait MetadataMapSupport {
+ def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue]
+
+ def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
+ jsonToString(toJsonLinkedHashMap)
+ }
+
+ protected def jsonToString(
+ jsonMap: mutable.LinkedHashMap[String, JValue]):
mutable.LinkedHashMap[String, String] = {
+ def removeWhitespace(str: String): String = {
+ str.replaceAll("\\s+$", "")
+ }
+
+ val map = new mutable.LinkedHashMap[String, String]()
+ jsonMap.foreach { case (key, jValue) =>
+ val stringValue = jValue match {
+ case JString(value) => removeWhitespace(value)
+ case JArray(values) =>
+ values.map(_.values)
+ .map {
+ case str: String => quoteIdentifier(removeWhitespace(str))
+ case other => removeWhitespace(other.toString)
+ }
+ .mkString("[", ", ", "]")
+ case JObject(fields) =>
+ fields.map { case (k, v) =>
+ s"$k=${removeWhitespace(v.values.toString)}"
+ }
+ .mkString("[", ", ", "]")
+ case JInt(value) => value.toString
+ case JDouble(value) => value.toString
+ case _ => removeWhitespace(jValue.values.toString)
+ }
+ map.put(key, stringValue)
+ }
+ map
+ }
+}
+
/**
* A function defined in the catalog.
@@ -74,25 +116,31 @@ case class CatalogStorageFormat(
outputFormat: Option[String],
serde: Option[String],
compressed: Boolean,
- properties: Map[String, String]) {
+ properties: Map[String, String]) extends MetadataMapSupport {
override def toString: String = {
- toLinkedHashMap.map { case ((key, value)) =>
+ toLinkedHashMap.map { case (key, value) =>
if (value.isEmpty) key else s"$key: $value"
}.mkString("Storage(", ", ", ")")
}
- def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
- val map = new mutable.LinkedHashMap[String, String]()
- locationUri.foreach(l => map.put("Location", l.toString))
- serde.foreach(map.put("Serde Library", _))
- inputFormat.foreach(map.put("InputFormat", _))
- outputFormat.foreach(map.put("OutputFormat", _))
- if (compressed) map.put("Compressed", "")
+ def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+ val map = mutable.LinkedHashMap[String, JValue]()
+
+ locationUri.foreach(l => map += ("Location" -> JString(l.toString)))
+ serde.foreach(s => map += ("Serde Library" -> JString(s)))
+ inputFormat.foreach(format => map += ("InputFormat" -> JString(format)))
+ outputFormat.foreach(format => map += ("OutputFormat" -> JString(format)))
+
+ if (compressed) map += ("Compressed" -> JBool(true))
+
SQLConf.get.redactOptions(properties) match {
case props if props.isEmpty => // No-op
case props =>
- map.put("Storage Properties", props.map(p => p._1 + "=" +
p._2).mkString("[", ", ", "]"))
+ val storagePropsJson = JObject(
+ props.map { case (k, v) => k -> JString(v) }.toList
+ )
+ map += ("Storage Properties" -> storagePropsJson)
}
map
}
@@ -120,35 +168,44 @@ case class CatalogTablePartition(
parameters: Map[String, String] = Map.empty,
createTime: Long = System.currentTimeMillis,
lastAccessTime: Long = -1,
- stats: Option[CatalogStatistics] = None) {
+ stats: Option[CatalogStatistics] = None) extends MetadataMapSupport {
+ def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+ val map = mutable.LinkedHashMap[String, JValue]()
- def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
- val map = new mutable.LinkedHashMap[String, String]()
- val specString = spec.map { case (k, v) => s"$k=$v" }.mkString(", ")
- map.put("Partition Values", s"[$specString]")
- map ++= storage.toLinkedHashMap
- if (parameters.nonEmpty) {
- map.put("Partition Parameters", s"{" +
- s"${SQLConf.get.redactOptions(parameters).map(p => p._1 + "=" +
p._2).mkString(", ")}}")
+ val specJson = JObject(spec.map { case (k, v) => k -> JString(v) }.toList)
+ map += ("Partition Values" -> specJson)
+
+ storage.toJsonLinkedHashMap.foreach { case (k, v) =>
+ map += (k -> v)
}
- map.put("Created Time", new Date(createTime).toString)
- val lastAccess = {
- if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
+
+ if (parameters.nonEmpty) {
+ val paramsJson = JObject(SQLConf.get.redactOptions(parameters).map {
+ case (k, v) => k -> JString(v)
+ }.toList)
+ map += ("Partition Parameters" -> paramsJson)
}
- map.put("Last Access", lastAccess)
- stats.foreach(s => map.put("Partition Statistics", s.simpleString))
+
+ map += ("Created Time" -> JString(new Date(createTime).toString))
+
+ val lastAccess = if (lastAccessTime <= 0) JString("UNKNOWN")
+ else JString(new Date(lastAccessTime).toString)
+ map += ("Last Access" -> lastAccess)
+
+ stats.foreach(s => map += ("Partition Statistics" ->
JString(s.simpleString)))
+
map
}
override def toString: String = {
- toLinkedHashMap.map { case ((key, value)) =>
+ toLinkedHashMap.map { case (key, value) =>
if (value.isEmpty) key else s"$key: $value"
}.mkString("CatalogPartition(\n\t", "\n\t", ")")
}
/** Readable string representation for the CatalogTablePartition. */
def simpleString: String = {
- toLinkedHashMap.map { case ((key, value)) =>
+ toLinkedHashMap.map { case (key, value) =>
if (value.isEmpty) key else s"$key: $value"
}.mkString("", "\n", "")
}
@@ -284,7 +341,7 @@ object ClusterBySpec {
case class BucketSpec(
numBuckets: Int,
bucketColumnNames: Seq[String],
- sortColumnNames: Seq[String]) extends SQLConfHelper {
+ sortColumnNames: Seq[String]) extends SQLConfHelper with
MetadataMapSupport {
if (numBuckets <= 0 || numBuckets > conf.bucketingMaxBuckets) {
throw QueryCompilationErrors.invalidBucketNumberError(
@@ -301,11 +358,11 @@ case class BucketSpec(
s"$numBuckets buckets, $bucketString$sortString"
}
- def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
- mutable.LinkedHashMap[String, String](
- "Num Buckets" -> numBuckets.toString,
- "Bucket Columns" -> bucketColumnNames.map(quoteIdentifier).mkString("[",
", ", "]"),
- "Sort Columns" -> sortColumnNames.map(quoteIdentifier).mkString("[", ",
", "]")
+ def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+ mutable.LinkedHashMap[String, JValue](
+ "Num Buckets" -> JInt(numBuckets),
+ "Bucket Columns" -> JArray(bucketColumnNames.map(JString).toList),
+ "Sort Columns" -> JArray(sortColumnNames.map(JString).toList)
)
}
}
@@ -355,7 +412,7 @@ case class CatalogTable(
tracksPartitionsInCatalog: Boolean = false,
schemaPreservesCase: Boolean = true,
ignoredProperties: Map[String, String] = Map.empty,
- viewOriginalText: Option[String] = None) {
+ viewOriginalText: Option[String] = None) extends MetadataMapSupport {
import CatalogTable._
@@ -524,67 +581,81 @@ case class CatalogTable(
locationUri, inputFormat, outputFormat, serde, compressed, properties))
}
+ def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+ val filteredTableProperties = SQLConf.get
+ .redactOptions(properties.filter { case (k, v) =>
+ !k.startsWith(VIEW_PREFIX) && v.nonEmpty
+ })
- def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
- val map = new mutable.LinkedHashMap[String, String]()
- val tableProperties =
- SQLConf.get.redactOptions(properties.filter { case (k, _) =>
!k.startsWith(VIEW_PREFIX) })
- .toSeq.sortBy(_._1)
- .map(p => p._1 + "=" + p._2)
- val partitionColumns =
partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
- val lastAccess = {
- if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
+ val tableProperties: JValue =
+ if (filteredTableProperties.isEmpty) JNull
+ else JObject(
+ filteredTableProperties.toSeq.sortBy(_._1).map { case (k, v) => k ->
JString(v) }: _*)
+
+ val partitionColumns: JValue =
+ if (partitionColumnNames.nonEmpty)
JArray(partitionColumnNames.map(JString).toList)
+ else JNull
+
+ val lastAccess: JValue =
+ if (lastAccessTime <= 0) JString("UNKNOWN") else JString(
+
DateTimeUtils.microsToInstant(DateTimeUtils.millisToMicros(lastAccessTime)).toString)
+
+ val viewQueryOutputColumns: JValue =
+ if (viewQueryColumnNames.nonEmpty)
JArray(viewQueryColumnNames.map(JString).toList)
+ else JNull
+
+ val map = mutable.LinkedHashMap[String, JValue]()
+
+ if (identifier.catalog.isDefined) map += "Catalog" ->
JString(identifier.catalog.get)
+ if (identifier.database.isDefined) map += "Database" ->
JString(identifier.database.get)
+ map += "Table" -> JString(identifier.table)
+ if (Option(owner).exists(_.nonEmpty)) map += "Owner" -> JString(owner)
+ map += "Created Time" ->
+
JString(DateTimeUtils.microsToInstant(DateTimeUtils.millisToMicros(createTime)).toString)
+ if (lastAccess != JNull) map += "Last Access" -> lastAccess
+ map += "Created By" -> JString(s"Spark $createVersion")
+ map += "Type" -> JString(tableType.name)
+ if (provider.isDefined) map += "Provider" -> JString(provider.get)
+ bucketSpec.foreach { spec =>
+ map ++= spec.toJsonLinkedHashMap.map { case (k, v) => k -> v }
}
-
- identifier.catalog.foreach(map.put("Catalog", _))
- identifier.database.foreach(map.put("Database", _))
- map.put("Table", identifier.table)
- if (owner != null && owner.nonEmpty) map.put("Owner", owner)
- map.put("Created Time", new Date(createTime).toString)
- map.put("Last Access", lastAccess)
- map.put("Created By", "Spark " + createVersion)
- map.put("Type", tableType.name)
- provider.foreach(map.put("Provider", _))
- bucketSpec.foreach(map ++= _.toLinkedHashMap)
- comment.foreach(map.put("Comment", _))
- collation.foreach(map.put("Collation", _))
- if (tableType == CatalogTableType.VIEW) {
- viewText.foreach(map.put("View Text", _))
- viewOriginalText.foreach(map.put("View Original Text", _))
- if (SQLConf.get.viewSchemaBindingEnabled) {
- map.put("View Schema Mode", viewSchemaMode.toString)
- }
- if (viewCatalogAndNamespace.nonEmpty) {
- import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
- map.put("View Catalog and Namespace", viewCatalogAndNamespace.quoted)
- }
- if (viewQueryColumnNames.nonEmpty) {
- map.put("View Query Output Columns",
- viewQueryColumnNames.map(quoteIdentifier).mkString("[", ", ", "]"))
- }
+ if (comment.isDefined) map += "Comment" -> JString(comment.get)
+ if (collation.isDefined) map += "Collation" -> JString(collation.get)
+ if (tableType == CatalogTableType.VIEW && viewText.isDefined) {
+ map += "View Text" -> JString(viewText.get)
}
-
- if (tableProperties.nonEmpty) {
- map.put("Table Properties", tableProperties.mkString("[", ", ", "]"))
+ if (tableType == CatalogTableType.VIEW && viewOriginalText.isDefined) {
+ map += "View Original Text" -> JString(viewOriginalText.get)
}
- stats.foreach(s => map.put("Statistics", s.simpleString))
- map ++= storage.toLinkedHashMap
- if (tracksPartitionsInCatalog) map.put("Partition Provider", "Catalog")
- if (partitionColumnNames.nonEmpty) map.put("Partition Columns",
partitionColumns)
- if (schema.nonEmpty) map.put("Schema", schema.treeString)
-
- map
+ if (SQLConf.get.viewSchemaBindingEnabled && tableType ==
CatalogTableType.VIEW) {
+ map += "View Schema Mode" -> JString(viewSchemaMode.toString)
+ }
+ if (viewCatalogAndNamespace.nonEmpty && tableType ==
CatalogTableType.VIEW) {
+ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+ map += "View Catalog and Namespace" ->
JString(viewCatalogAndNamespace.quoted)
+ }
+ if (viewQueryOutputColumns != JNull) {
+ map += "View Query Output Columns" -> viewQueryOutputColumns
+ }
+ if (tableProperties != JNull) map += "Table Properties" -> tableProperties
+ if (stats.isDefined) map += "Statistics" -> JString(stats.get.simpleString)
+ map ++= storage.toJsonLinkedHashMap.map { case (k, v) => k -> v }
+ if (tracksPartitionsInCatalog) map += "Partition Provider" ->
JString("Catalog")
+ if (partitionColumns != JNull) map += "Partition Columns" ->
partitionColumns
+ if (schema.nonEmpty) map += "Schema" -> JString(schema.treeString)
+
+ map.filterNot(_._2 == JNull)
}
override def toString: String = {
- toLinkedHashMap.map { case ((key, value)) =>
+ toLinkedHashMap.map { case (key, value) =>
if (value.isEmpty) key else s"$key: $value"
}.mkString("CatalogTable(\n", "\n", ")")
}
/** Readable string representation for the CatalogTable. */
def simpleString: String = {
- toLinkedHashMap.map { case ((key, value)) =>
+ toLinkedHashMap.map { case (key, value) =>
if (value.isEmpty) key else s"$key: $value"
}.mkString("", "\n", "")
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 0a300cea03ff..1f9c14830364 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -5200,10 +5200,17 @@ class AstBuilder extends DataTypeAstBuilder
*/
override def visitDescribeRelation(ctx: DescribeRelationContext):
LogicalPlan = withOrigin(ctx) {
val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null
+ val asJson = ctx.JSON != null
+ if (asJson && !isExtended) {
+ val tableName =
ctx.identifierReference.getText.split("\\.").lastOption.getOrElse("table")
+ throw QueryCompilationErrors.describeJsonNotExtendedError(tableName)
+ }
val relation = createUnresolvedTableOrView(ctx.identifierReference,
"DESCRIBE TABLE")
if (ctx.describeColName != null) {
if (ctx.partitionSpec != null) {
throw QueryParsingErrors.descColumnForPartitionUnsupportedError(ctx)
+ } else if (asJson) {
+ throw QueryCompilationErrors.describeColJsonUnsupportedError()
} else {
DescribeColumn(
relation,
@@ -5221,7 +5228,11 @@ class AstBuilder extends DataTypeAstBuilder
} else {
Map.empty[String, String]
}
- DescribeRelation(relation, partitionSpec, isExtended)
+ if (asJson) {
+ DescribeRelationJson(relation, partitionSpec, isExtended)
+ } else {
+ DescribeRelation(relation, partitionSpec, isExtended)
+ }
}
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
index 99d2ea775195..a6ec6f573630 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
@@ -21,13 +21,19 @@ import
org.apache.spark.sql.catalyst.expressions.AttributeReference
import org.apache.spark.sql.types.{MetadataBuilder, StringType}
private[sql] object DescribeCommandSchema {
- def describeTableAttributes(): Seq[AttributeReference] = Seq(
- AttributeReference("col_name", StringType, nullable = false,
- new MetadataBuilder().putString("comment", "name of the
column").build())(),
- AttributeReference("data_type", StringType, nullable = false,
- new MetadataBuilder().putString("comment", "data type of the
column").build())(),
- AttributeReference("comment", StringType, nullable = true,
- new MetadataBuilder().putString("comment", "comment of the
column").build())())
+ def describeJsonTableAttributes(): Seq[AttributeReference] =
+ Seq(
+ AttributeReference("json_metadata", StringType, nullable = false,
+ new MetadataBuilder().putString("comment", "JSON metadata of the
table").build())()
+ )
+ def describeTableAttributes(): Seq[AttributeReference] = {
+ Seq(AttributeReference("col_name", StringType, nullable = false,
+ new MetadataBuilder().putString("comment", "name of the
column").build())(),
+ AttributeReference("data_type", StringType, nullable = false,
+ new MetadataBuilder().putString("comment", "data type of the
column").build())(),
+ AttributeReference("comment", StringType, nullable = true,
+ new MetadataBuilder().putString("comment", "comment of the
column").build())())
+ }
def describeColumnAttributes(): Seq[AttributeReference] = Seq(
AttributeReference("info_name", StringType, nullable = false,
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 58c62a90225a..b486a1fd0a72 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -691,6 +691,19 @@ object DescribeRelation {
def getOutputAttrs: Seq[Attribute] =
DescribeCommandSchema.describeTableAttributes()
}
+/**
+ * The logical plan of the DESCRIBE relation_name AS JSON command.
+ */
+case class DescribeRelationJson(
+ relation: LogicalPlan,
+ partitionSpec: TablePartitionSpec,
+ isExtended: Boolean) extends UnaryCommand {
+ override val output: Seq[Attribute] =
DescribeCommandSchema.describeJsonTableAttributes()
+ override def child: LogicalPlan = relation
+ override protected def withNewChildInternal(newChild: LogicalPlan):
DescribeRelationJson =
+ copy(relation = newChild)
+}
+
/**
* The logical plan of the DESCRIBE relation_name col_name command.
*/
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 65ae8da3c4da..ac419fd150ae 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1615,6 +1615,10 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase with Compilat
notSupportedForV2TablesError("ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]")
}
+ def describeAsJsonNotSupportedForV2TablesError(): Throwable = {
+ notSupportedForV2TablesError("DESCRIBE TABLE AS JSON")
+ }
+
def loadDataNotSupportedForV2TablesError(): Throwable = {
notSupportedForV2TablesError("LOAD DATA")
}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 6a388a7849f7..3b58518b98da 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -152,13 +152,17 @@ class ResolveSessionCatalog(val catalogManager:
CatalogManager)
case RenameTable(ResolvedV1TableOrViewIdentifier(oldIdent), newName,
isView) =>
AlterTableRenameCommand(oldIdent, newName.asTableIdentifier, isView)
+ case DescribeRelationJson(
+ ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended) =>
+ DescribeTableJsonCommand(ident, partitionSpec, isExtended)
+
// Use v1 command to describe (temp) view, as v2 catalog doesn't support
view yet.
case DescribeRelation(
- ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended,
output) =>
+ ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended,
output) =>
DescribeTableCommand(ident, partitionSpec, isExtended, output)
case DescribeColumn(
- ResolvedViewIdentifier(ident), column: UnresolvedAttribute,
isExtended, output) =>
+ ResolvedViewIdentifier(ident), column: UnresolvedAttribute,
isExtended, output) =>
// For views, the column will not be resolved by `ResolveReferences`
because
// `ResolvedView` stores only the identifier.
DescribeColumnCommand(ident, column.nameParts, isExtended, output)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index a58e8fac6e36..e69e05ba7dec 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -19,12 +19,16 @@ package org.apache.spark.sql.execution.command
import java.net.{URI, URISyntaxException}
+import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.jdk.CollectionConverters._
import scala.util.control.NonFatal
import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType,
FsAction, FsPermission}
+import org.json4s._
+import org.json4s.JsonAST.JObject
+import org.json4s.jackson.JsonMethods._
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.{SQLConfHelper, TableIdentifier}
@@ -746,6 +750,261 @@ case class DescribeTableCommand(
}
}
+/**
+ * Command that looks like
+ * {{{
+ * DESCRIBE [EXTENDED|FORMATTED] table_name partitionSpec? [AS JSON];
+ * }}}
+ */
+case class DescribeTableJsonCommand(
+ table: TableIdentifier,
+ partitionSpec: TablePartitionSpec,
+ isExtended: Boolean) extends LeafRunnableCommand {
+ override val output = DescribeCommandSchema.describeJsonTableAttributes()
+ // Already added to jsonMap in DescribeTableJsonCommand
+ private val excludedKeys = Set("catalog", "schema", "database", "table")
+
+ override def run(sparkSession: SparkSession): Seq[Row] = {
+ val jsonMap = mutable.LinkedHashMap[String, JValue]()
+ val catalog = sparkSession.sessionState.catalog
+
+ if (catalog.isTempView(table)) {
+ if (partitionSpec.nonEmpty) {
+ throw
QueryCompilationErrors.descPartitionNotAllowedOnTempView(table.identifier)
+ }
+ val schema = catalog.getTempViewOrPermanentTableMetadata(table).schema
+ describeColsJson(schema, jsonMap, header = false)
+ } else {
+ val metadata = catalog.getTableRawMetadata(table)
+ val schema = if (metadata.schema.isEmpty) {
+ // In older versions of Spark,
+ // the table schema can be empty and should be inferred at runtime.
+ sparkSession.table(metadata.identifier).schema
+ } else {
+ metadata.schema
+ }
+
+ addKeyValueToMap("table_name", JString(metadata.identifier.table),
jsonMap)
+ table.catalog.foreach(catalog => addKeyValueToMap("catalog_name",
JString(catalog), jsonMap))
+ table.database.foreach { db =>
+ addKeyValueToMap("namespace", JArray(List(JString(db))), jsonMap)
+ addKeyValueToMap("schema_name", JString(db), jsonMap)
+ }
+
+ describeColsJson(schema, jsonMap, header = false)
+ describeClusteringInfoJson(metadata, jsonMap)
+
+ if (partitionSpec.nonEmpty) {
+ // Outputs the partition-specific info for the DDL command:
+ // "DESCRIBE [EXTENDED|FORMATTED] table_name PARTITION (partitionVal*)"
+ describePartitionInfoJson(sparkSession, catalog, metadata, jsonMap)
+ } else {
+ describeFormattedTableInfoJson(metadata, jsonMap)
+ }
+ }
+
+ Seq(Row(compact(render(JObject(jsonMap.toList)))))
+ }
+
+ private def addKeyValueToMap(
+ key: String,
+ value: JValue,
+ jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+ // Rename some JSON keys that are pre-named in describe table
implementation
+ val renames = Map(
+ "inputformat" -> "input_format",
+ "outputformat" -> "output_format"
+ )
+
+ val normalizedKey = key.toLowerCase().replace(" ", "_")
+ val renamedKey = renames.getOrElse(normalizedKey, normalizedKey)
+
+ if (!jsonMap.contains(renamedKey) && !excludedKeys.contains(renamedKey)) {
+ jsonMap += renamedKey -> value
+ }
+ }
+
+ /**
+ * Util to recursively form JSON string representation of data type, used
for DESCRIBE AS JSON.
+ * Differs from `json` in DataType.scala by providing additional fields for
some types.
+ */
+ private def jsonType(
+ dataType: DataType): JValue = {
+ dataType match {
+ case arrayType: ArrayType =>
+ JObject(
+ "name" -> JString("array"),
+ "element_type" -> jsonType(arrayType.elementType),
+ "element_nullable" -> JBool(arrayType.containsNull)
+ )
+
+ case mapType: MapType =>
+ JObject(
+ "name" -> JString("map"),
+ "key_type" -> jsonType(mapType.keyType),
+ "value_type" -> jsonType(mapType.valueType),
+ "value_nullable" -> JBool(mapType.valueContainsNull)
+ )
+
+ case structType: StructType =>
+ val fieldsJson = structType.fields.map { field =>
+ val baseJson = List(
+ "name" -> JString(field.name),
+ "type" -> jsonType(field.dataType),
+ "nullable" -> JBool(field.nullable)
+ )
+ val commentJson = field.getComment().map(comment => "comment" ->
JString(comment)).toList
+ val defaultJson =
+ field.getCurrentDefaultValue().map(default => "default" ->
JString(default)).toList
+
+ JObject(baseJson ++ commentJson ++ defaultJson: _*)
+ }.toList
+
+ JObject(
+ "name" -> JString("struct"),
+ "fields" -> JArray(fieldsJson)
+ )
+
+ case decimalType: DecimalType =>
+ JObject(
+ "name" -> JString("decimal"),
+ "precision" -> JInt(decimalType.precision),
+ "scale" -> JInt(decimalType.scale)
+ )
+
+ case varcharType: VarcharType =>
+ JObject(
+ "name" -> JString("varchar"),
+ "length" -> JInt(varcharType.length)
+ )
+
+ case charType: CharType =>
+ JObject(
+ "name" -> JString("char"),
+ "length" -> JInt(charType.length)
+ )
+
+ // Only override TimestampType; TimestampType_NTZ type is already
timestamp_ntz
+ case _: TimestampType =>
+ JObject("name" -> JString("timestamp_ltz"))
+
+ case yearMonthIntervalType: YearMonthIntervalType =>
+ def getFieldName(field: Byte): String =
YearMonthIntervalType.fieldToString(field)
+
+ JObject(
+ "name" -> JString("interval"),
+ "start_unit" ->
JString(getFieldName(yearMonthIntervalType.startField)),
+ "end_unit" -> JString(getFieldName(yearMonthIntervalType.endField))
+ )
+
+ case dayTimeIntervalType: DayTimeIntervalType =>
+ def getFieldName(field: Byte): String =
DayTimeIntervalType.fieldToString(field)
+
+ JObject(
+ "name" -> JString("interval"),
+ "start_unit" ->
JString(getFieldName(dayTimeIntervalType.startField)),
+ "end_unit" -> JString(getFieldName(dayTimeIntervalType.endField))
+ )
+
+ case _ =>
+ JObject("name" -> JString(dataType.typeName))
+ }
+ }
+
+ private def describeColsJson(
+ schema: StructType,
+ jsonMap: mutable.LinkedHashMap[String, JValue],
+ header: Boolean): Unit = {
+ val columnsJson = jsonType(StructType(schema.fields))
+ .asInstanceOf[JObject].find(_.isInstanceOf[JArray]).get
+ addKeyValueToMap("columns", columnsJson, jsonMap)
+ }
+
+ private def describeClusteringInfoJson(
+ table: CatalogTable, jsonMap: mutable.LinkedHashMap[String, JValue]):
Unit = {
+ table.clusterBySpec.foreach { clusterBySpec =>
+ val clusteringColumnsJson: JValue = JArray(
+ clusterBySpec.columnNames.map { fieldNames =>
+ val nestedFieldOpt =
table.schema.findNestedField(fieldNames.fieldNames.toIndexedSeq)
+ assert(nestedFieldOpt.isDefined,
+ "The clustering column " +
+ s"${fieldNames.fieldNames.map(quoteIfNeeded).mkString(".")} " +
+ s"was not found in the table schema
${table.schema.catalogString}."
+ )
+ val (path, field) = nestedFieldOpt.get
+ JObject(
+ "name" -> JString((path :+
field.name).map(quoteIfNeeded).mkString(".")),
+ "type" -> jsonType(field.dataType),
+ "comment" -> field.getComment().map(JString).getOrElse(JNull)
+ )
+ }.toList
+ )
+ addKeyValueToMap("clustering_information", clusteringColumnsJson,
jsonMap)
+ }
+ }
+
+ private def describeFormattedTableInfoJson(
+ table: CatalogTable, jsonMap: mutable.LinkedHashMap[String, JValue]):
Unit = {
+ table.bucketSpec match {
+ case Some(spec) =>
+ spec.toJsonLinkedHashMap.foreach { case (key, value) =>
+ addKeyValueToMap(key, value, jsonMap)
+ }
+ case _ =>
+ }
+ table.storage.toJsonLinkedHashMap.foreach { case (key, value) =>
+ addKeyValueToMap(key, value, jsonMap)
+ }
+
+ val filteredTableInfo = table.toJsonLinkedHashMap
+
+ filteredTableInfo.map { case (key, value) =>
+ addKeyValueToMap(key, value, jsonMap)
+ }
+ }
+
+ private def describePartitionInfoJson(
+ spark: SparkSession,
+ catalog: SessionCatalog,
+ metadata: CatalogTable,
+ jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+ if (metadata.tableType == CatalogTableType.VIEW) {
+ throw
QueryCompilationErrors.descPartitionNotAllowedOnView(table.identifier)
+ }
+
+ DDLUtils.verifyPartitionProviderIsHive(spark, metadata, "DESC PARTITION")
+ val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec(
+ partitionSpec,
+ metadata.partitionSchema,
+ table.quotedString,
+ spark.sessionState.conf.resolver)
+ val partition = catalog.getPartition(table, normalizedPartSpec)
+
+ // First add partition details to jsonMap.
+ // `addKeyValueToMap` only adds unique keys, so this ensures the
+ // more detailed partition information is added
+ // in the case of duplicated key names (e.g. storage_information).
+ partition.toJsonLinkedHashMap.map { case (key, value) =>
+ addKeyValueToMap(key, value, jsonMap)
+ }
+
+ metadata.toJsonLinkedHashMap.map { case (key, value) =>
+ addKeyValueToMap(key, value, jsonMap)
+ }
+
+ metadata.bucketSpec match {
+ case Some(spec) =>
+ spec.toJsonLinkedHashMap.map { case (key, value) =>
+ addKeyValueToMap(key, value, jsonMap)
+ }
+ case _ =>
+ }
+ metadata.storage.toJsonLinkedHashMap.map { case (key, value) =>
+ addKeyValueToMap(key, value, jsonMap)
+ }
+ }
+}
+
/**
* Command that looks like
* {{{
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 499721fbae4e..f7a3be925475 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -344,6 +344,9 @@ class DataSourceV2Strategy(session: SparkSession) extends
Strategy with Predicat
case DescribeNamespace(ResolvedNamespace(catalog, ns, _), extended,
output) =>
DescribeNamespaceExec(output, catalog.asNamespaceCatalog, ns, extended)
:: Nil
+ case DescribeRelationJson(_, _, _) =>
+ throw QueryCompilationErrors.describeAsJsonNotSupportedForV2TablesError()
+
case DescribeRelation(r: ResolvedTable, partitionSpec, isExtended, output)
=>
if (partitionSpec.nonEmpty) {
throw
QueryCompilationErrors.describeDoesNotSupportPartitionForV2TablesError()
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
index ff0935bfd03e..f52f69a5ff80 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
@@ -56,6 +56,35 @@ DESCRIBE t
DescribeTableCommand `spark_catalog`.`default`.`t`, false, [col_name#x,
data_type#x, comment#x]
+-- !query
+DESCRIBE EXTENDED t AS JSON
+-- !query analysis
+DescribeTableJsonCommand `spark_catalog`.`default`.`t`, true
+
+
+-- !query
+DESCRIBE t AS JSON
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "DESCRIBE_JSON_NOT_EXTENDED",
+ "sqlState" : "0A000",
+ "messageParameters" : {
+ "tableName" : "t"
+ }
+}
+
+
+-- !query
+DESC FORMATTED t a AS JSON
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON",
+ "sqlState" : "0A000"
+}
+
+
-- !query
DESC default.t
-- !query analysis
@@ -110,6 +139,12 @@ DESC t PARTITION (c='Us', d=1)
DescribeTableCommand `spark_catalog`.`default`.`t`, [c=Us, d=1], false,
[col_name#x, data_type#x, comment#x]
+-- !query
+DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON
+-- !query analysis
+DescribeTableJsonCommand `spark_catalog`.`default`.`t`, [c=Us, d=1], true
+
+
-- !query
DESC EXTENDED t PARTITION (c='Us', d=1)
-- !query analysis
@@ -290,6 +325,12 @@ EXPLAIN DESCRIBE t PARTITION (c='Us', d=2)
ExplainCommand 'DescribeRelation [c=Us, d=2], false, [col_name#x, data_type#x,
comment#x], SimpleMode
+-- !query
+EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON
+-- !query analysis
+ExplainCommand 'DescribeRelationJson [c=Us, d=2], true, SimpleMode
+
+
-- !query
DROP TABLE t
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index b37931456d00..aa6f38defdec 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -21,6 +21,14 @@ ALTER TABLE t ADD PARTITION (c='Us', d=1);
DESCRIBE t;
+DESCRIBE EXTENDED t AS JSON;
+
+-- AnalysisException: describe table as json must be extended
+DESCRIBE t AS JSON;
+
+-- AnalysisException: describe col as json unsupported
+DESC FORMATTED t a AS JSON;
+
DESC default.t;
DESC TABLE t;
@@ -39,6 +47,8 @@ DESC EXTENDED t;
DESC t PARTITION (c='Us', d=1);
+DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON;
+
DESC EXTENDED t PARTITION (c='Us', d=1);
DESC FORMATTED t PARTITION (c='Us', d=1);
@@ -88,6 +98,7 @@ EXPLAIN DESC EXTENDED t;
EXPLAIN EXTENDED DESC t;
EXPLAIN DESCRIBE t b;
EXPLAIN DESCRIBE t PARTITION (c='Us', d=2);
+EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON;
-- DROP TEST TABLES/VIEWS
DROP TABLE t;
@@ -119,3 +130,4 @@ DESC EXTENDED e;
DESC TABLE EXTENDED e;
DESC FORMATTED e;
+
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 0f51816c145e..015b0ceff335 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -71,6 +71,41 @@ c string
d string
+-- !query
+DESCRIBE EXTENDED t AS JSON
+-- !query schema
+struct<json_metadata:string>
+-- !query output
+{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"integer"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not
included in
comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","pa [...]
+
+
+-- !query
+DESCRIBE t AS JSON
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "DESCRIBE_JSON_NOT_EXTENDED",
+ "sqlState" : "0A000",
+ "messageParameters" : {
+ "tableName" : "t"
+ }
+}
+
+
+-- !query
+DESC FORMATTED t a AS JSON
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON",
+ "sqlState" : "0A000"
+}
+
+
-- !query
DESC default.t
-- !query schema
@@ -263,6 +298,14 @@ c string
d string
+-- !query
+DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON
+-- !query schema
+struct<json_metadata:string>
+-- !query output
+{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"integer"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not
included in
comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"****
[...]
+
+
-- !query
DESC EXTENDED t PARTITION (c='Us', d=1)
-- !query schema
@@ -644,6 +687,16 @@ Execute DescribeTableCommand
+- DescribeTableCommand `spark_catalog`.`default`.`t`, [c=Us, d=2], false,
[col_name#x, data_type#x, comment#x]
+-- !query
+EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+Execute DescribeTableJsonCommand
+ +- DescribeTableJsonCommand `spark_catalog`.`default`.`t`, [c=Us, d=2], true
+
+
-- !query
DROP TABLE t
-- !query schema
diff --git
a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
index f9c9df3f9bf5..521b0afe1926 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
@@ -170,6 +170,7 @@ IS true
ITEMS false
ITERATE false
JOIN true
+JSON false
KEYS false
LANGUAGE false
LAST false
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
index 67e5e4170d78..4d702588ad2b 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
@@ -170,6 +170,7 @@ IS false
ITEMS false
ITERATE false
JOIN false
+JSON false
KEYS false
LANGUAGE false
LAST false
diff --git
a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
index 67e5e4170d78..4d702588ad2b 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
@@ -170,6 +170,7 @@ IS false
ITEMS false
ITERATE false
JOIN false
+JSON false
KEYS false
LANGUAGE false
LAST false
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index cc32e2eff255..a4b967ca61f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -128,7 +128,6 @@ View Schema Mode: BINDING
Schema: root
|-- e: integer (nullable = true)
-
showdb show_t1 false Catalog: spark_catalog
Database: showdb
Table: show_t1
@@ -146,7 +145,6 @@ Schema: root
|-- c: string (nullable = true)
|-- d: string (nullable = true)
-
showdb show_t2 false Catalog: spark_catalog
Database: showdb
Table: show_t2
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 7daf2c6b1b58..04f274e4af59 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -60,7 +60,18 @@ trait SQLQueryTestHelper extends Logging {
.replaceAll("CTERelationDef \\d+,", s"CTERelationDef xxxx,")
.replaceAll("CTERelationRef \\d+,", s"CTERelationRef xxxx,")
.replaceAll("@\\w*,", s"@xxxxxxxx,")
- .replaceAll("\\*\\(\\d+\\) ", "*") // remove the WholeStageCodegen
codegenStageIds
+ .replaceAll("\\*\\(\\d+\\) ", "*")
+ .replaceAll(
+ s""""location":.*?$clsName/""",
+ s""""location": "$notIncludedMsg/{warehouse_dir}/""")
+ .replaceAll(s""""created_by":".*?"""", s""""created_by
$notIncludedMsg":"None"""")
+ .replaceAll(s""""created_time":".*?"""", s""""created_time
$notIncludedMsg":"None"""")
+ .replaceAll(s""""last_access":".*?"""", s""""last_access
$notIncludedMsg":"None"""")
+ .replaceAll(s""""owner":".*?"""", s""""owner $notIncludedMsg":"None"""")
+ .replaceAll(s""""partition_statistics":"\\d+"""",
+ s""""partition_statistics $notIncludedMsg":"None"""")
+ .replaceAll("cterelationdef \\d+,", "cterelationdef xxxx,")
+ .replaceAll("cterelationref \\d+,", "cterelationref xxxx,")
}
/**
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
index 944f20bf8e92..d81f007e2a4d 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql.execution.command
+import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.{AnalysisTest,
UnresolvedAttribute, UnresolvedTableOrView}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
import org.apache.spark.sql.catalyst.plans.logical.{DescribeColumn,
DescribeRelation}
@@ -75,6 +76,12 @@ class DescribeTableParserSuite extends AnalysisTest {
UnresolvedAttribute(Seq("col")),
isExtended = true))
+ val error = intercept[AnalysisException](parsePlan("DESCRIBE EXTENDED t
col AS JSON"))
+
+ checkError(
+ exception = error,
+ condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON")
+
val sql = "DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"
checkError(
exception = parseException(parsePlan)(sql),
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 92467cbcb6c0..541fec1cb374 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec,
CatalogStorageFormat,
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast,
EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, Literal,
StringLiteral}
import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
-import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn,
AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect,
DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction,
InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable,
OneRowRelation, OverwriteByExpression, OverwritePartitionsDynamic, Project,
SetTableLocation, SetTableProperties, ShowTableProperties, SubqueryAlias,
UnsetTableProperties, UpdateAction, UpdateTable}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn,
AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect,
DeleteAction, DeleteFromTable, DescribeRelation, DescribeRelationJson,
DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan,
MergeIntoTable, OneRowRelation, OverwriteByExpression,
OverwritePartitionsDynamic, Project, SetTableLocation, SetTableProperties,
ShowTableProperties, SubqueryAlias, UnsetTableProperties, UpdateAction, U [...]
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
import org.apache.spark.sql.connector.FakeV2Provider
@@ -961,6 +961,43 @@ class PlanResolutionSuite extends AnalysisTest {
assert(parsed4.isInstanceOf[DescribeTableCommand])
}
+ test("DESCRIBE AS JSON relation") {
+ Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach
{
+ case (tblName, useV1Command) =>
+ val sql = s"DESC TABLE EXTENDED $tblName AS JSON"
+ val parsed = parseAndResolve(sql)
+ if (useV1Command) {
+ val expected2 = DescribeTableJsonCommand(
+ TableIdentifier(tblName, Some("default"),
Some(SESSION_CATALOG_NAME)),
+ Map.empty, true)
+
+ comparePlans(parsed, expected2)
+ } else {
+ parsed match {
+ case DescribeRelationJson(_: ResolvedTable, _, isExtended) =>
+ assert(isExtended)
+ case _ => fail("Expect DescribeTable, but got:\n" +
parsed.treeString)
+ }
+ }
+
+ val sql2 = s"DESC TABLE EXTENDED $tblName PARTITION(a=1) AS JSON"
+ val parsed2 = parseAndResolve(sql2)
+ if (useV1Command) {
+ val expected2 = DescribeTableJsonCommand(
+ TableIdentifier(tblName, Some("default"),
Some(SESSION_CATALOG_NAME)),
+ Map("a" -> "1"), true)
+ comparePlans(parsed2, expected2)
+ } else {
+ parsed2 match {
+ case DescribeRelationJson(_: ResolvedTable, partitionSpec,
isExtended) =>
+ assert(isExtended)
+ assert(partitionSpec == Map("a" -> "1"))
+ case _ => fail("Expect DescribeTable, but got:\n" +
parsed2.treeString)
+ }
+ }
+ }
+ }
+
test("DELETE FROM") {
Seq("v2Table", "testcat.tab").foreach { tblName =>
val sql1 = s"DELETE FROM $tblName"
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
index 164ac2bff9f6..4413087e886e 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -19,6 +19,9 @@ package org.apache.spark.sql.execution.command.v1
import java.util.Locale
+import org.json4s._
+import org.json4s.jackson.JsonMethods.parse
+
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import
org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
import org.apache.spark.sql.execution.command
@@ -36,6 +39,7 @@ import org.apache.spark.sql.types.StringType
*/
trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
with command.TestsV1AndV2Commands {
+ implicit val formats: org.json4s.DefaultFormats.type =
org.json4s.DefaultFormats
def getProvider(): String =
defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT)
@@ -276,4 +280,558 @@ class DescribeTableSuite extends DescribeTableSuiteBase
with CommandSuiteBase {
))
}
}
+
+ test("DESCRIBE AS JSON throws when not EXTENDED") {
+ withNamespaceAndTable("ns", "table") { t =>
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE $t (
+ | employee_id INT,
+ | employee_name STRING,
+ | department STRING,
+ | hire_date DATE
+ |) USING parquet
+ |OPTIONS ('compression' = 'snappy', 'max_records' = '1000')
+ |PARTITIONED BY (department, hire_date)
+ |CLUSTERED BY (employee_id) SORTED BY (employee_name ASC) INTO 4
BUCKETS
+ |COMMENT 'Employee data table for testing partitions and buckets'
+ |TBLPROPERTIES ('version' = '1.0')
+ |""".stripMargin
+ spark.sql(tableCreationStr)
+
+ val error = intercept[AnalysisException] {
+ spark.sql(s"DESCRIBE $t AS JSON")
+ }
+
+ checkError(
+ exception = error,
+ condition = "DESCRIBE_JSON_NOT_EXTENDED",
+ parameters = Map("tableName" -> "table"))
+ }
+ }
+
+ test("DESCRIBE AS JSON partitions, clusters, buckets") {
+ withNamespaceAndTable("ns", "table") { t =>
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE $t (
+ | employee_id INT,
+ | employee_name STRING,
+ | department STRING,
+ | hire_date DATE
+ |) USING parquet
+ |OPTIONS ('compression' = 'snappy', 'max_records' = '1000')
+ |PARTITIONED BY (department, hire_date)
+ |CLUSTERED BY (employee_id) SORTED BY (employee_name ASC) INTO 4
BUCKETS
+ |COMMENT 'Employee data table for testing partitions and buckets'
+ |TBLPROPERTIES ('version' = '1.0')
+ |""".stripMargin
+ spark.sql(tableCreationStr)
+ val descriptionDf = spark.sql(s"DESCRIBE EXTENDED $t AS JSON")
+ val firstRow = descriptionDf.select("json_metadata").head()
+ val jsonValue = firstRow.getString(0)
+ val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+ val expectedOutput = DescribeTableJson(
+ table_name = Some("table"),
+ catalog_name = Some(SESSION_CATALOG_NAME),
+ namespace = Some(List("ns")),
+ schema_name = Some("ns"),
+ columns = Some(List(
+ TableColumn("employee_id", Type("integer"), true),
+ TableColumn("employee_name", Type("string"), true),
+ TableColumn("department", Type("string"), true),
+ TableColumn("hire_date", Type("date"), true)
+ )),
+ owner = Some(""),
+ created_time = Some(""),
+ last_access = Some("UNKNOWN"),
+ created_by = Some("Spark 4.0.0-SNAPSHOT"),
+ `type` = Some("MANAGED"),
+ provider = Some("parquet"),
+ bucket_columns = Some(List("employee_id")),
+ sort_columns = Some(List("employee_name")),
+ comment = Some("Employee data table for testing partitions and
buckets"),
+ table_properties = Some(Map(
+ "version" -> "1.0"
+ )),
+ location = Some(""),
+ serde_library =
Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"),
+ inputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+ outputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+ storage_properties = Some(Map(
+ "compression" -> "snappy",
+ "max_records" -> "1000"
+ )),
+ partition_provider = Some("Catalog"),
+ partition_columns = Some(List("department", "hire_date"))
+ )
+
+ if (getProvider() == "hive") {
+ assert(expectedOutput == parsedOutput.copy(owner = Some(""),
+ created_time = Some(""),
+ location = Some("")))
+ } else {
+ assert(expectedOutput.copy(inputformat = None, outputformat = None,
serde_library = None)
+ == parsedOutput.copy(owner = Some(""), created_time = Some(""),
location = Some("")))
+ }
+ }
+ }
+
+ test("DESCRIBE AS JSON partition spec") {
+ withNamespaceAndTable("ns", "table") { t =>
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE $t (
+ | id INT,
+ | name STRING,
+ | region STRING,
+ | category STRING
+ |) USING parquet
+ |PARTITIONED BY (region, category)
+ |COMMENT 'test partition spec'
+ |TBLPROPERTIES ('t' = 'test')
+ |""".stripMargin
+ spark.sql(tableCreationStr)
+ spark.sql(s"ALTER TABLE $t ADD PARTITION (region='USA',
category='tech')")
+
+ val descriptionDf =
+ spark.sql(s"DESCRIBE FORMATTED $t PARTITION (region='USA',
category='tech') AS JSON")
+ val firstRow = descriptionDf.select("json_metadata").head()
+ val jsonValue = firstRow.getString(0)
+ val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+ val expectedOutput = DescribeTableJson(
+ table_name = Some("table"),
+ catalog_name = Some("spark_catalog"),
+ namespace = Some(List("ns")),
+ schema_name = Some("ns"),
+ columns = Some(List(
+ TableColumn("id", Type("integer"), true),
+ TableColumn("name", Type("string"), true),
+ TableColumn("region", Type("string"), true),
+ TableColumn("category", Type("string"), true)
+ )),
+ last_access = Some("UNKNOWN"),
+ created_by = Some("Spark 4.0.0-SNAPSHOT"),
+ `type` = Some("MANAGED"),
+ provider = Some("parquet"),
+ bucket_columns = Some(Nil),
+ sort_columns = Some(Nil),
+ comment = Some("test partition spec"),
+ table_properties = Some(Map(
+ "t" -> "test"
+ )),
+ serde_library =
Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"),
+ inputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+ outputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+ storage_properties = Some(Map(
+ "serialization.format" -> "1"
+ )),
+ partition_provider = Some("Catalog"),
+ partition_columns = Some(List("region", "category")),
+ partition_values = Some(Map("region" -> "USA", "category" -> "tech"))
+ )
+
+ val filteredParsedStorageProperties =
+ parsedOutput.storage_properties.map(_.filterNot { case (key, _) => key
== "path" })
+
+ if (getProvider() == "hive") {
+ assert(expectedOutput ==
+ parsedOutput.copy(location = None, created_time = None, owner = None,
+ storage_properties = filteredParsedStorageProperties))
+ } else {
+ assert(expectedOutput.copy(
+ inputformat = None, outputformat = None, serde_library = None,
storage_properties = None)
+ == parsedOutput.copy(location = None, created_time = None, owner =
None,
+ storage_properties = filteredParsedStorageProperties))
+ }
+ }
+ }
+
+ test("DESCRIBE AS JSON default values") {
+ withNamespaceAndTable("ns", "table") { t =>
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE $t (
+ | id INT DEFAULT 1,
+ | name STRING DEFAULT 'unknown',
+ | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ | is_active BOOLEAN DEFAULT true
+ |)
+ |USING parquet COMMENT 'table_comment'
+ |""".stripMargin
+ spark.sql(tableCreationStr)
+
+ val descriptionDf = spark.sql(s"DESC EXTENDED $t AS JSON")
+ val firstRow = descriptionDf.select("json_metadata").head()
+ val jsonValue = firstRow.getString(0)
+ val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+ val expectedOutput = DescribeTableJson(
+ table_name = Some("table"),
+ catalog_name = Some("spark_catalog"),
+ namespace = Some(List("ns")),
+ schema_name = Some("ns"),
+ columns = Some(List(
+ TableColumn("id", Type("integer"), default = Some("1")),
+ TableColumn("name", Type("string"), default = Some("'unknown'")),
+ TableColumn("created_at", Type("timestamp_ltz"), default =
Some("CURRENT_TIMESTAMP")),
+ TableColumn("is_active", Type("boolean"), default = Some("true"))
+ )),
+ last_access = Some("UNKNOWN"),
+ created_by = Some("Spark 4.0.0-SNAPSHOT"),
+ `type` = Some("MANAGED"),
+ storage_properties = None,
+ provider = Some("parquet"),
+ bucket_columns = Some(Nil),
+ sort_columns = Some(Nil),
+ comment = Some("table_comment"),
+ serde_library =
Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"),
+ inputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+ outputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+ table_properties = None
+ )
+ if (getProvider() == "hive") {
+ assert(
+ expectedOutput ==
+ parsedOutput.copy(location = None, created_time = None, owner =
None)
+ )
+ } else {
+ assert(
+ expectedOutput.copy(inputformat = None, outputformat = None,
serde_library = None) ==
+ parsedOutput.copy(location = None, created_time = None, owner =
None)
+ )
+ }
+ }
+ }
+
+ test("DESCRIBE AS JSON temp view") {
+ withNamespaceAndTable("ns", "table") { t =>
+ withTempView("temp_view") {
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE $t (id INT, name STRING, created_at TIMESTAMP)
+ | USING parquet
+ | OPTIONS ('compression' 'snappy')
+ | CLUSTERED BY (id, name) SORTED BY (created_at) INTO 4 BUCKETS
+ | COMMENT 'test temp view'
+ | TBLPROPERTIES ('parquet.encryption' = 'true')
+ |""".stripMargin
+ spark.sql(tableCreationStr)
+ spark.sql(s"CREATE TEMPORARY VIEW temp_view AS SELECT * FROM $t")
+ val descriptionDf = spark.sql(s"DESCRIBE EXTENDED temp_view AS JSON")
+ val firstRow = descriptionDf.select("json_metadata").head()
+ val jsonValue = firstRow.getString(0)
+ val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+ val expectedOutput = DescribeTableJson(
+ columns = Some(List(
+ TableColumn("id", Type("integer")),
+ TableColumn("name", Type("string")),
+ TableColumn("created_at", Type("timestamp_ltz"))
+ ))
+ )
+
+ assert(expectedOutput == parsedOutput)
+ }
+ }
+ }
+
+ test("DESCRIBE AS JSON persistent view") {
+ withNamespaceAndTable("ns", "table") { t =>
+ withView("view") {
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE $t (id INT, name STRING, created_at TIMESTAMP)
+ | USING parquet
+ | OPTIONS ('compression' 'snappy')
+ | CLUSTERED BY (id, name) SORTED BY (created_at) INTO 4 BUCKETS
+ | COMMENT 'test temp view'
+ | TBLPROPERTIES ('parquet.encryption' = 'true')
+ |""".stripMargin
+ spark.sql(tableCreationStr)
+ spark.sql(s"CREATE VIEW view AS SELECT * FROM $t")
+ val descriptionDf = spark.sql(s"DESCRIBE EXTENDED view AS JSON")
+ val firstRow = descriptionDf.select("json_metadata").head()
+ val jsonValue = firstRow.getString(0)
+ val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+ val expectedOutput = DescribeTableJson(
+ table_name = Some("view"),
+ catalog_name = Some("spark_catalog"),
+ namespace = Some(List("default")),
+ schema_name = Some("default"),
+ columns = Some(List(
+ TableColumn("id", Type("integer")),
+ TableColumn("name", Type("string")),
+ TableColumn("created_at", Type("timestamp_ltz"))
+ )),
+ serde_library =
Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
+ inputformat =
Some("org.apache.hadoop.mapred.SequenceFileInputFormat"),
+ outputformat =
Some("org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"),
+ storage_properties = Some(Map("serialization.format" -> "1")),
+ last_access = Some("UNKNOWN"),
+ created_by = Some("Spark 4.0.0-SNAPSHOT"),
+ `type` = Some("VIEW"),
+ view_text = Some("SELECT * FROM spark_catalog.ns.table"),
+ view_original_text = Some("SELECT * FROM spark_catalog.ns.table"),
+ view_schema_mode = Some("COMPENSATION"),
+ view_catalog_and_namespace = Some("spark_catalog.default"),
+ view_query_output_columns = Some(List("id", "name", "created_at"))
+ )
+
+ if (getProvider() == "hive") {
+ assert(expectedOutput ==
+ parsedOutput.copy(table_properties = None, created_time = None,
owner = None))
+ } else {
+ assert(expectedOutput.copy(inputformat = None,
+ outputformat = None, serde_library = None, storage_properties =
None)
+ == parsedOutput.copy(table_properties = None, created_time = None,
owner = None))
+ }
+ }
+ }
+ }
+
+ test("DESCRIBE AS JSON for column throws Analysis Exception") {
+ withNamespaceAndTable("ns", "table") { t =>
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE ns.table(
+ | cust_id INT,
+ | state VARCHAR(20),
+ | name STRING COMMENT "Short name"
+ | )
+ | USING parquet
+ | PARTITIONED BY (state)
+ |""".stripMargin
+ spark.sql(tableCreationStr)
+ spark.sql("INSERT INTO ns.table PARTITION (state = \"CA\") VALUES (100,
\"Jane\")")
+ val error = intercept[AnalysisException] {
+ spark.sql("DESCRIBE FORMATTED ns.table ns.table.name AS JSON")
+ }
+
+ checkError(
+ exception = error,
+ condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON")
+ }
+ }
+
+ test("DESCRIBE AS JSON complex types") {
+ withNamespaceAndTable("ns", "table") { t =>
+ val tableCreationStr =
+ s"""
+ |CREATE TABLE $t (
+ | id STRING,
+ | logs VARIANT,
+ | nested_struct STRUCT<
+ | name: STRING,
+ | age: INT,
+ | contact: STRUCT<
+ | email: STRING,
+ | phone_numbers: ARRAY<STRING>,
+ | addresses: ARRAY<STRUCT<
+ | street: STRING,
+ | city: STRING,
+ | zip: INT
+ | >>
+ | >
+ | >,
+ | preferences MAP<STRING, ARRAY<STRING>>
+ |) USING parquet
+ | OPTIONS (option1 'value1', option2 'value2')
+ | PARTITIONED BY (id)
+ | COMMENT 'A table with nested complex types'
+ | TBLPROPERTIES ('property1' = 'value1', 'password' = 'password')
+ """.stripMargin
+ spark.sql(tableCreationStr)
+ val descriptionDf = spark.sql(s"DESCRIBE EXTENDED $t AS JSON")
+ val firstRow = descriptionDf.select("json_metadata").head()
+ val jsonValue = firstRow.getString(0)
+ val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+ val expectedOutput = DescribeTableJson(
+ table_name = Some("table"),
+ catalog_name = Some("spark_catalog"),
+ namespace = Some(List("ns")),
+ schema_name = Some("ns"),
+ columns = Some(List(
+ TableColumn(
+ name = "logs",
+ `type` = Type("variant"),
+ default = None
+ ),
+ TableColumn(
+ name = "nested_struct",
+ `type` = Type(
+ name = "struct",
+ fields = Some(List(
+ Field(
+ name = "name",
+ `type` = Type("string")
+ ),
+ Field(
+ name = "age",
+ `type` = Type("integer")
+ ),
+ Field(
+ name = "contact",
+ `type` = Type(
+ name = "struct",
+ fields = Some(List(
+ Field(
+ name = "email",
+ `type` = Type("string")
+ ),
+ Field(
+ name = "phone_numbers",
+ `type` = Type(
+ name = "array",
+ element_type = Some(Type("string")),
+ element_nullable = Some(true)
+ )
+ ),
+ Field(
+ name = "addresses",
+ `type` = Type(
+ name = "array",
+ element_type = Some(Type(
+ name = "struct",
+ fields = Some(List(
+ Field(
+ name = "street",
+ `type` = Type("string")
+ ),
+ Field(
+ name = "city",
+ `type` = Type("string")
+ ),
+ Field(
+ name = "zip",
+ `type` = Type("integer")
+ )
+ ))
+ )),
+ element_nullable = Some(true)
+ )
+ )
+ ))
+ )
+ )
+ ))
+ ),
+ default = None
+ ),
+ TableColumn(
+ name = "preferences",
+ `type` = Type(
+ name = "map",
+ key_type = Some(Type("string")),
+ value_type = Some(Type(
+ name = "array",
+ element_type = Some(Type("string")),
+ element_nullable = Some(true)
+ )),
+ value_nullable = Some(true)
+ ),
+ default = None
+ ),
+ TableColumn(
+ name = "id",
+ `type` = Type("string"),
+ default = None
+ )
+ )),
+ serde_library =
Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"),
+ inputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"),
+ outputformat =
Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"),
+ storage_properties = Some(Map(
+ "option1" -> "value1",
+ "option2" -> "value2"
+ )),
+ last_access = Some("UNKNOWN"),
+ created_by = Some("Spark 4.0.0-SNAPSHOT"),
+ `type` = Some("MANAGED"),
+ provider = Some("parquet"),
+ comment = Some("A table with nested complex types"),
+ table_properties = Some(Map(
+ "password" -> "*********(redacted)",
+ "property1" -> "value1"
+ )),
+ partition_provider = Some("Catalog"),
+ partition_columns = Some(List("id"))
+ )
+
+ if (getProvider() == "hive") {
+ assert(expectedOutput ==
+ parsedOutput.copy(location = None, created_time = None, owner =
None))
+ } else {
+ assert(expectedOutput.copy(inputformat = None, outputformat = None,
serde_library = None)
+ == parsedOutput.copy(location = None, created_time = None, owner =
None))
+ }
+ }
+ }
}
+
+/** Represents JSON output of DESCRIBE TABLE AS JSON */
+case class DescribeTableJson(
+ table_name: Option[String] = None,
+ catalog_name: Option[String] = None,
+ namespace: Option[List[String]] = Some(Nil),
+ schema_name: Option[String] = None,
+ columns: Option[List[TableColumn]] = Some(Nil),
+ owner: Option[String] = None,
+ created_time: Option[String] = None,
+ last_access: Option[String] = None,
+ created_by: Option[String] = None,
+ `type`: Option[String] = None,
+ provider: Option[String] = None,
+ bucket_columns: Option[List[String]] = Some(Nil),
+ sort_columns: Option[List[String]] = Some(Nil),
+ comment: Option[String] = None,
+ table_properties: Option[Map[String, String]] = None,
+ location: Option[String] = None,
+ serde_library: Option[String] = None,
+ inputformat: Option[String] = None,
+ outputformat: Option[String] = None,
+ storage_properties: Option[Map[String, String]] = None,
+ partition_provider: Option[String] = None,
+ partition_columns: Option[List[String]] = Some(Nil),
+ partition_values: Option[Map[String, String]] = None,
+ view_text: Option[String] = None,
+ view_original_text: Option[String] = None,
+ view_schema_mode: Option[String] = None,
+ view_catalog_and_namespace: Option[String] = None,
+ view_query_output_columns: Option[List[String]] = None
+ )
+
+/** Used for columns field of DescribeTableJson */
+case class TableColumn(
+ name: String,
+ `type`: Type,
+ element_nullable: Boolean = true,
+ comment: Option[String] = None,
+ default: Option[String] = None
+)
+
+case class Type(
+ name: String,
+ fields: Option[List[Field]] = None,
+ `type`: Option[Type] = None,
+ element_type: Option[Type] = None,
+ key_type: Option[Type] = None,
+ value_type: Option[Type] = None,
+ comment: Option[String] = None,
+ default: Option[String] = None,
+ element_nullable: Option[Boolean] = Some(true),
+ value_nullable: Option[Boolean] = Some(true),
+ nullable: Option[Boolean] = Some(true)
+)
+
+case class Field(
+ name: String,
+ `type`: Type,
+ element_nullable: Boolean = true,
+ comment: Option[String] = None,
+ default: Option[String] = None
+)
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index da0ddd3a156f..254eda69e86e 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends
SharedThriftServer {
val sessionHandle = client.openSession(user, "")
val infoValue = client.getInfo(sessionHandle,
GetInfoType.CLI_ODBC_KEYWORDS)
// scalastyle:off line.size.limit
- assert(infoValue.getStringValue ==
"ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURR
[...]
+ assert(infoValue.getStringValue ==
"ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURR
[...]
// scalastyle:on line.size.limit
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]