This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 22ef681 [SPARK-33803][SQL] Sort table properties by key in DESCRIBE
TABLE command
22ef681 is described below
commit 22ef681578f10c0539db15732b5f2931b2cef84f
Author: HyukjinKwon <[email protected]>
AuthorDate: Wed Dec 16 13:42:30 2020 +0000
[SPARK-33803][SQL] Sort table properties by key in DESCRIBE TABLE command
### What changes were proposed in this pull request?
This PR proposes to sort table properties in DESCRIBE TABLE command. This
is consistent with DSv2 command as well:
https://github.com/apache/spark/blob/e3058ba17cb4512537953eb4ded884e24ee93ba2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala#L63
This PR fixes the test case in Scala 2.13 build as well where the table
properties have different order in the map.
### Why are the changes needed?
To keep the deterministic and pretty output, and fix the tests in Scala
2.13 build.
See
https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-maven-hadoop-3.2-scala-2.13/49/testReport/junit/org.apache.spark.sql/SQLQueryTestSuite/describe_sql/
```
describe.sql
Expected "...spark_catalog, view.[query.out.col.2=c,
view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]]",
but got "...spark_catalog, view.[catalogAndNamespace.part.1=default,
view.query.out.col.2=c, view.referredTempFunctionsNames=[]]]" Result did not
match for query #29
DESC FORMATTED v
```
### Does this PR introduce _any_ user-facing change?
Yes, it will change the text output from `DESCRIBE [EXTENDED|FORMATTED]
table_name`.
Now the table properties are sorted by its key.
### How was this patch tested?
Related unittests were fixed accordingly.
Closes #30799 from HyukjinKwon/SPARK-33803.
Authored-by: HyukjinKwon <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 7845865b8d5c03a4daf82588be0ff2ebb90152a7)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/catalog/interface.scala | 3 ++-
.../resources/sql-tests/results/describe.sql.out | 8 +++----
.../results/postgreSQL/create_view.sql.out | 28 +++++++++++-----------
3 files changed, 20 insertions(+), 19 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 6743b05..9876ee3 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -388,7 +388,8 @@ case class CatalogTable(
def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
val map = new mutable.LinkedHashMap[String, String]()
- val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[",
", ", "]")
+ val tableProperties = properties.toSeq.sortBy(_._1)
+ .map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
val partitionColumns =
partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
val lastAccess = {
if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 2674d05..105a696 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -130,7 +130,7 @@ Num Buckets 2
Bucket Columns [`a`]
Sort Columns [`b`]
Comment table_comment
-Table Properties [t=test, e=3]
+Table Properties [e=3, t=test]
Location [not included in comparison]/{warehouse_dir}/t
Storage Properties [a=1, b=2]
Partition Provider Catalog
@@ -162,7 +162,7 @@ Num Buckets 2
Bucket Columns [`a`]
Sort Columns [`b`]
Comment table_comment
-Table Properties [t=test, e=3]
+Table Properties [e=3, t=test]
Location [not included in comparison]/{warehouse_dir}/t
Storage Properties [a=1, b=2]
Partition Provider Catalog
@@ -477,7 +477,7 @@ View Text SELECT * FROM t
View Original Text SELECT * FROM t
View Catalog and Namespace spark_catalog.default
View Query Output Columns [a, b, c, d]
-Table Properties [view.query.out.col.3=d,
view.catalogAndNamespace.numParts=2, view.query.out.col.0=a,
view.query.out.numCols=4, view.referredTempViewNames=[],
view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog,
view.query.out.col.2=c, view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=default]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=default, view.query.out.col.0=a,
view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d,
view.query.out.numCols=4, view.referredTempFunctionsNames=[],
view.referredTempViewNames=[]]
-- !query
@@ -501,7 +501,7 @@ View Text SELECT * FROM t
View Original Text SELECT * FROM t
View Catalog and Namespace spark_catalog.default
View Query Output Columns [a, b, c, d]
-Table Properties [view.query.out.col.3=d,
view.catalogAndNamespace.numParts=2, view.query.out.col.0=a,
view.query.out.numCols=4, view.referredTempViewNames=[],
view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog,
view.query.out.col.2=c, view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=default]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=default, view.query.out.col.0=a,
view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d,
view.query.out.numCols=4, view.referredTempFunctionsNames=[],
view.referredTempViewNames=[]]
-- !query
diff --git
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index 7d331f2..3a431cb 100644
---
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -257,7 +257,7 @@ View Text SELECT * FROM base_table
View Original Text SELECT * FROM base_table
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a,
view.query.out.col.1=id, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -313,7 +313,7 @@ View Text SELECT * FROM base_table
View Original Text SELECT * FROM base_table
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a,
view.query.out.col.1=id, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -359,7 +359,7 @@ View Original Text SELECT t1.a AS t1_a, t2.a AS
t2_a
WHERE t1.id = t2.id
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [t1_a, t2_a]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=t1_a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=t1_a,
view.query.out.col.1=t2_a, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -413,7 +413,7 @@ View Text SELECT * FROM base_table WHERE
id IN (SELECT id FROM base_t
View Original Text SELECT * FROM base_table WHERE id IN (SELECT id FROM
base_table2)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a,
view.query.out.col.1=id, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -443,7 +443,7 @@ View Text SELECT t1.id, t2.a FROM
base_table t1, (SELECT * FROM base_
View Original Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM
base_table2) t2
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [id, a]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=id, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=id,
view.query.out.col.1=a, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -473,7 +473,7 @@ View Text SELECT * FROM base_table WHERE
EXISTS (SELECT 1 FROM base_t
View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM
base_table2)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a,
view.query.out.col.1=id, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -503,7 +503,7 @@ View Text SELECT * FROM base_table WHERE
NOT EXISTS (SELECT 1 FROM ba
View Original Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1
FROM base_table2)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a,
view.query.out.col.1=id, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -533,7 +533,7 @@ View Text SELECT * FROM base_table WHERE
EXISTS (SELECT 1)
View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a,
view.query.out.col.1=id, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -669,7 +669,7 @@ View Text SELECT * FROM t1 CROSS JOIN t2
View Original Text SELECT * FROM t1 CROSS JOIN t2
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
-Table Properties [view.query.out.col.3=value,
view.catalogAndNamespace.numParts=2, view.query.out.col.0=num,
view.query.out.numCols=4, view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name,
view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num,
view.query.out.col.1=name, view.query.out.col.2=num2,
view.query.out.col.3=value, view.query.out.numCols=4,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -710,7 +710,7 @@ View Text SELECT * FROM t1 INNER JOIN t2
ON t1.num = t2.num2
View Original Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
-Table Properties [view.query.out.col.3=value,
view.catalogAndNamespace.numParts=2, view.query.out.col.0=num,
view.query.out.numCols=4, view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name,
view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num,
view.query.out.col.1=name, view.query.out.col.2=num2,
view.query.out.col.3=value, view.query.out.numCols=4,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -751,7 +751,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2
ON t1.num = t2.num2
View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
-Table Properties [view.query.out.col.3=value,
view.catalogAndNamespace.numParts=2, view.query.out.col.0=num,
view.query.out.numCols=4, view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name,
view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num,
view.query.out.col.1=name, view.query.out.col.2=num2,
view.query.out.col.3=value, view.query.out.numCols=4,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -792,7 +792,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2
ON t1.num = t2.num2 AND t2.va
View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND
t2.value = 'xxx'
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
-Table Properties [view.query.out.col.3=value,
view.catalogAndNamespace.numParts=2, view.query.out.col.0=num,
view.query.out.numCols=4, view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name,
view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num,
view.query.out.col.1=name, view.query.out.col.2=num2,
view.query.out.col.3=value, view.query.out.numCols=4,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM
tbl3 WHERE f = 2)
AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [a, b]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a,
view.query.out.col.1=b, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
@@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h =
tbl3.f)
AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [a, b]
-Table Properties [view.catalogAndNamespace.numParts=2,
view.query.out.col.0=a, view.query.out.numCols=2,
view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true,
view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog,
view.referredTempFunctionsNames=[],
view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties [view.catalogAndNamespace.numParts=2,
view.catalogAndNamespace.part.0=spark_catalog,
view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a,
view.query.out.col.1=b, view.query.out.numCols=2,
view.referredTempFunctionsNames=[], view.referredTempViewNames=[],
view.sqlConfig.spark.sql.ansi.enabled=true]
-- !query
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]