This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 4231d5824525 [SPARK-51307][SQL][3.5] locationUri in CatalogStorageFormat shall be decoded for display 4231d5824525 is described below commit 4231d58245251a34ae80a38ea4bbf7d720caa439 Author: Kent Yao <y...@apache.org> AuthorDate: Thu Mar 6 14:10:20 2025 +0800 [SPARK-51307][SQL][3.5] locationUri in CatalogStorageFormat shall be decoded for display ### What changes were proposed in this pull request? This PR uses CatalogUtils.URIToString instead of URI.toString to decode the location URI. ### Why are the changes needed? For example, for partition specs like test1=X'16', test3=timestamp'2018-11-17 13:33:33', the stored path will include them as `test1=%16/test3=2018-11-17 13%3A33%3A33` because the special characters are escaped. Furthermore, while resolving the whole path string to a URI object, this path fragment becomes `test1=%2516/test3=2018-11-17 13%253A33%253A33`, so we need to decode `%25` -> `%` before displaying to users ### Does this PR introduce _any_ user-facing change? yes, DESC TABLE will not show 2x-encoded paths. ### How was this patch tested? new tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #50164 from yaooqinn/SPARK-51307-35. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> --- .../spark/sql/catalyst/catalog/interface.scala | 2 +- .../sql-tests/analyzer-results/describe.sql.out | 67 +++++++++++++++----- .../test/resources/sql-tests/inputs/describe.sql | 12 ++++ .../resources/sql-tests/results/describe.sql.out | 74 ++++++++++++++++++++++ .../org/apache/spark/sql/SQLQueryTestHelper.scala | 3 + 5 files changed, 141 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 6b72500f3f67..63b01e61fb8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -76,7 +76,7 @@ case class CatalogStorageFormat( def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { val map = new mutable.LinkedHashMap[String, String]() - locationUri.foreach(l => map.put("Location", l.toString)) + locationUri.foreach(l => map.put("Location", CatalogUtils.URIToString(l))) serde.foreach(map.put("Serde Library", _)) inputFormat.foreach(map.put("InputFormat", _)) outputFormat.foreach(map.put("OutputFormat", _)) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out index b9fe5c1d74c1..69a83b08fbc5 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out @@ -318,14 +318,7 @@ DropTableCommand `spark_catalog`.`default`.`v`, false, true, false -- !query CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING parquet COMMENT 'table_comment' -- !query analysis -org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException -{ - "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", - "sqlState" : "42P07", - "messageParameters" : { - "relationName" : "`spark_catalog`.`default`.`d`" - } -} +CreateDataSourceTableCommand `spark_catalog`.`default`.`d`, false -- !query @@ -355,14 +348,7 @@ DescribeTableCommand `spark_catalog`.`default`.`d`, true, [col_name#x, data_type -- !query CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 42) USING parquet COMMENT 'table_comment' -- !query analysis -org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException -{ - "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", - "sqlState" : "42P07", - "messageParameters" : { - "relationName" : "`spark_catalog`.`default`.`e`" - } -} +CreateDataSourceTableCommand `spark_catalog`.`default`.`e`, false -- !query @@ -387,3 +373,52 @@ DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, data_type DESC FORMATTED e -- !query analysis DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, data_type#x, comment#x] + + +-- !query +CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`f`, ErrorIfExists, [A, B, C] + +- Project [APACHE AS A#x, cast(SPARK as binary) AS B#x, 2018-11-17 13:33:33 AS C#x] + +- OneRowRelation + + +-- !query +DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`f`, [B=SPARK, C=2018-11-17 13:33:33], true, [col_name#x, data_type#x, comment#x] + + +-- !query +DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'JSON'", + "hint" : ": extra input 'JSON'" + } +} + + +-- !query +DROP TABLE d +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.d + + +-- !query +DROP TABLE e +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.e + + +-- !query +DROP TABLE f +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.f diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql index b37931456d00..c784bc9fb74d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql @@ -119,3 +119,15 @@ DESC EXTENDED e; DESC TABLE EXTENDED e; DESC FORMATTED e; + +CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C; + +DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33'); + +DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON; + +DROP TABLE d; + +DROP TABLE e; + +DROP TABLE f; diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index 10c27ea0cc79..824a0721ea75 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -864,3 +864,77 @@ Location [not included in comparison]/{warehouse_dir}/e # Column Default Values a string CONCAT('a\n b\n ', 'c\n d') b int 42 + + +-- !query +CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C +-- !query schema +struct<> +-- !query output + + + +-- !query +DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +A string +B binary +C timestamp +# Partition Information +# col_name data_type comment +B binary +C timestamp + +# Detailed Partition Information +Database default +Table f +Partition Values [B=SPARK, C=2018-11-17 13:33:33] +Location [not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33 +Partition Parameters {numFiles=1, totalSize=15, transient_lastDdlTime=[not included in comparison]} +Created Time [not included in comparison] +Last Access [not included in comparison] + +# Storage Information +Location [not included in comparison]/{warehouse_dir}/f + + +-- !query +DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'JSON'", + "hint" : ": extra input 'JSON'" + } +} + + +-- !query +DROP TABLE d +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE e +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE f +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index d8956961440d..7c9ef2e93f9d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -48,6 +48,9 @@ trait SQLQueryTestHelper extends Logging { .replaceAll(s"file:[^\\s,]*$clsName", s"file:$notIncludedMsg/{warehouse_dir}") .replaceAll("Created By.*", s"Created By $notIncludedMsg") .replaceAll("Created Time.*", s"Created Time $notIncludedMsg") + .replaceAll(s"transient_lastDdlTime=\\d+", s"transient_lastDdlTime=$notIncludedMsg") + .replaceAll(s""""transient_lastDdlTime":"\\d+"""", + s""""transient_lastDdlTime $notIncludedMsg":"None"""") .replaceAll("Last Access.*", s"Last Access $notIncludedMsg") .replaceAll("Owner\t.*", s"Owner\t$notIncludedMsg") .replaceAll("Partition Statistics\t\\d+", s"Partition Statistics\t$notIncludedMsg") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org