This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new 342055b2ad04 [SPARK-51307][SQL] locationUri in CatalogStorageFormat shall be decoded for display 342055b2ad04 is described below commit 342055b2ad04afa1198793c3cbe79dd82500e44d Author: Kent Yao <y...@apache.org> AuthorDate: Tue Mar 4 10:24:41 2025 -0800 [SPARK-51307][SQL] locationUri in CatalogStorageFormat shall be decoded for display ### What changes were proposed in this pull request? This PR uses CatalogUtils.URIToString instead of URI.toString to decode the location URI. ### Why are the changes needed? For example, for partition specs like test1=X'16', test3=timestamp'2018-11-17 13:33:33', the stored path will include them as `test1=%16/test3=2018-11-17 13%3A33%3A33` because the special characters are escaped. Furthermore, while resolving the whole path string to a URI object, this path fragment becomes `test1=%2516/test3=2018-11-17 13%253A33%253A33`, so we need to decode `%25` -> `%` before displaying to users ### Does this PR introduce _any_ user-facing change? yes, DESC TABLE will not show 2x-encoded paths. ### How was this patch tested? new tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #50074 from yaooqinn/SPARK-51307. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit eb7144396286f012317647c8979ee0e6e5f75868) Signed-off-by: Kent Yao <y...@apache.org> --- .../spark/sql/catalyst/catalog/interface.scala | 2 +- .../sql-tests/analyzer-results/describe.sql.out | 28 ++++++++++++ .../test/resources/sql-tests/inputs/describe.sql | 8 ++++ .../resources/sql-tests/results/describe.sql.out | 50 ++++++++++++++++++++++ .../org/apache/spark/sql/SQLQueryTestHelper.scala | 3 ++ 5 files changed, 90 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 1cb3520d4e26..5c4e9d4bddc5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -156,7 +156,7 @@ case class CatalogStorageFormat( def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = { val map = mutable.LinkedHashMap[String, JValue]() - locationUri.foreach(l => map += ("Location" -> JString(l.toString))) + locationUri.foreach(l => map += ("Location" -> JString(CatalogUtils.URIToString(l)))) serde.foreach(s => map += ("Serde Library" -> JString(s))) inputFormat.foreach(format => map += ("InputFormat" -> JString(format))) outputFormat.foreach(format => map += ("OutputFormat" -> JString(format))) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out index c13dd5302387..2f7237663b64 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out @@ -393,6 +393,27 @@ DESC FORMATTED e DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, data_type#x, comment#x] +-- !query +CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`f`, ErrorIfExists, [A, B, C] + +- Project [APACHE AS A#x, cast(SPARK as binary) AS B#x, 2018-11-17 13:33:33 AS C#x] + +- OneRowRelation + + +-- !query +DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`f`, [B=SPARK, C=2018-11-17 13:33:33], true, [col_name#x, data_type#x, comment#x] + + +-- !query +DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON +-- !query analysis +DescribeRelationJsonCommand [B=SPARK, C=2018-11-17 13:33:33], true, [json_metadata#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), default.f, V1Table(default.f), [A#x, B#x, C#x] + + -- !query DROP VIEW temp_v -- !query analysis @@ -430,3 +451,10 @@ DROP TABLE e -- !query analysis DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.e + + +-- !query +DROP TABLE f +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.f diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql index dd7f7c40ed52..dbe5bc840bce 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql @@ -122,6 +122,12 @@ DESC TABLE EXTENDED e; DESC FORMATTED e; +CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C; + +DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33'); + +DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON; + -- DROP TEST TABLES/VIEWS DROP VIEW temp_v; @@ -135,3 +141,5 @@ DROP TABLE t; DROP TABLE d; DROP TABLE e; + +DROP TABLE f; diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index da53ece198da..d5a3dd50dc7e 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -890,6 +890,48 @@ a string CONCAT('a\n b\n ', 'c\n d') b int 42 +-- !query +CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C +-- !query schema +struct<> +-- !query output + + + +-- !query +DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +A string +B binary +C timestamp +# Partition Information +# col_name data_type comment +B binary +C timestamp + +# Detailed Partition Information +Database default +Table f +Partition Values [B=SPARK, C=2018-11-17 13:33:33] +Location [not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33 +Partition Parameters [numFiles=1, totalSize=15, transient_lastDdlTime=[not included in comparison]] +Created Time [not included in comparison] +Last Access [not included in comparison] + +# Storage Information +Location [not included in comparison]/{warehouse_dir}/f + + +-- !query +DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON +-- !query schema +struct<json_metadata:string> +-- !query output +{"table_name":"f","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"A","type":{"name":"string"},"nullable":true},{"name":"B","type":{"name":"binary"},"nullable":true},{"name":"C","type":{"name":"timestamp_ltz"},"nullable":true}],"partition_values":{"B":"SPARK","C":"2018-11-17 13:33:33"},"location":"file:[not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33","partition_parameters":{"numFiles":"1","totalSize":"15", [...] + + -- !query DROP VIEW temp_v -- !query schema @@ -936,3 +978,11 @@ DROP TABLE e struct<> -- !query output + + +-- !query +DROP TABLE f +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 04f274e4af59..7cc556857774 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -66,6 +66,9 @@ trait SQLQueryTestHelper extends Logging { s""""location": "$notIncludedMsg/{warehouse_dir}/""") .replaceAll(s""""created_by":".*?"""", s""""created_by $notIncludedMsg":"None"""") .replaceAll(s""""created_time":".*?"""", s""""created_time $notIncludedMsg":"None"""") + .replaceAll(s"transient_lastDdlTime=\\d+", s"transient_lastDdlTime=$notIncludedMsg") + .replaceAll(s""""transient_lastDdlTime":"\\d+"""", + s""""transient_lastDdlTime $notIncludedMsg":"None"""") .replaceAll(s""""last_access":".*?"""", s""""last_access $notIncludedMsg":"None"""") .replaceAll(s""""owner":".*?"""", s""""owner $notIncludedMsg":"None"""") .replaceAll(s""""partition_statistics":"\\d+"""", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org