(spark) branch branch-4.0 updated: [SPARK-51307][SQL] locationUri in CatalogStorageFormat shall be decoded for display

yao Tue, 04 Mar 2025 18:34:34 -0800

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 342055b2ad04 [SPARK-51307][SQL] locationUri in CatalogStorageFormat 
shall be decoded for display
342055b2ad04 is described below

commit 342055b2ad04afa1198793c3cbe79dd82500e44d
Author: Kent Yao <y...@apache.org>
AuthorDate: Tue Mar 4 10:24:41 2025 -0800

    [SPARK-51307][SQL] locationUri in CatalogStorageFormat shall be decoded for 
display
    
    ### What changes were proposed in this pull request?
    
    This PR uses CatalogUtils.URIToString instead of URI.toString to decode the 
location URI.
    
    ### Why are the changes needed?
    
    For example, for partition specs like test1=X'16', 
test3=timestamp'2018-11-17 13:33:33', the stored path will include them as 
`test1=%16/test3=2018-11-17 13%3A33%3A33` because the special characters are 
escaped. Furthermore, while resolving the whole path string to a URI object, 
this path fragment becomes `test1=%2516/test3=2018-11-17 13%253A33%253A33`, so 
we need to decode `%25` -> `%` before displaying to users
    
    ### Does this PR introduce _any_ user-facing change?
    yes, DESC TABLE  will not show 2x-encoded paths.
    
    ### How was this patch tested?
    new tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #50074 from yaooqinn/SPARK-51307.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
    (cherry picked from commit eb7144396286f012317647c8979ee0e6e5f75868)
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../spark/sql/catalyst/catalog/interface.scala     |  2 +-
 .../sql-tests/analyzer-results/describe.sql.out    | 28 ++++++++++++
 .../test/resources/sql-tests/inputs/describe.sql   |  8 ++++
 .../resources/sql-tests/results/describe.sql.out   | 50 ++++++++++++++++++++++
 .../org/apache/spark/sql/SQLQueryTestHelper.scala  |  3 ++
 5 files changed, 90 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 1cb3520d4e26..5c4e9d4bddc5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -156,7 +156,7 @@ case class CatalogStorageFormat(
   def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
     val map = mutable.LinkedHashMap[String, JValue]()
 
-    locationUri.foreach(l => map += ("Location" -> JString(l.toString)))
+    locationUri.foreach(l => map += ("Location" -> 
JString(CatalogUtils.URIToString(l))))
     serde.foreach(s => map += ("Serde Library" -> JString(s)))
     inputFormat.foreach(format => map += ("InputFormat" -> JString(format)))
     outputFormat.foreach(format => map += ("OutputFormat" -> JString(format)))
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
index c13dd5302387..2f7237663b64 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
@@ -393,6 +393,27 @@ DESC FORMATTED e
 DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, 
data_type#x, comment#x]
 
 
+-- !query
+CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, 
CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`f`, 
ErrorIfExists, [A, B, C]
+   +- Project [APACHE AS A#x, cast(SPARK as binary) AS B#x, 2018-11-17 
13:33:33 AS C#x]
+      +- OneRowRelation
+
+
+-- !query
+DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`f`, [B=SPARK, C=2018-11-17 
13:33:33], true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') 
AS JSON
+-- !query analysis
+DescribeRelationJsonCommand [B=SPARK, C=2018-11-17 13:33:33], true, 
[json_metadata#x]
++- ResolvedTable V2SessionCatalog(spark_catalog), default.f, 
V1Table(default.f), [A#x, B#x, C#x]
+
+
 -- !query
 DROP VIEW temp_v
 -- !query analysis
@@ -430,3 +451,10 @@ DROP TABLE e
 -- !query analysis
 DropTable false, false
 +- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.e
+
+
+-- !query
+DROP TABLE f
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.f
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql 
b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index dd7f7c40ed52..dbe5bc840bce 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -122,6 +122,12 @@ DESC TABLE EXTENDED e;
 
 DESC FORMATTED e;
 
+CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, 
CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C;
+
+DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33');
+
+DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') 
AS JSON;
+
 -- DROP TEST TABLES/VIEWS
 
 DROP VIEW temp_v;
@@ -135,3 +141,5 @@ DROP TABLE t;
 DROP TABLE d;
 
 DROP TABLE e;
+
+DROP TABLE f;
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out 
b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index da53ece198da..d5a3dd50dc7e 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -890,6 +890,48 @@ a                          string                  
CONCAT('a\n b\n ', 'c\n d')
 b                      int                     42
 
 
+-- !query
+CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, 
CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+A                      string                                      
+B                      binary                                      
+C                      timestamp                                   
+# Partition Information                                                    
+# col_name             data_type               comment             
+B                      binary                                      
+C                      timestamp                                   
+                                                                   
+# Detailed Partition Information                                               
    
+Database               default                                     
+Table                  f                                           
+Partition Values       [B=SPARK, C=2018-11-17 13:33:33]                        
    
+Location [not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 
13%3A33%3A33                          
+Partition Parameters   [numFiles=1, totalSize=15, transient_lastDdlTime=[not 
included in comparison]]                      
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+                                                                   
+# Storage Information                                              
+Location [not included in comparison]/{warehouse_dir}/f
+
+
+-- !query
+DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') 
AS JSON
+-- !query schema
+struct<json_metadata:string>
+-- !query output
+{"table_name":"f","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"A","type":{"name":"string"},"nullable":true},{"name":"B","type":{"name":"binary"},"nullable":true},{"name":"C","type":{"name":"timestamp_ltz"},"nullable":true}],"partition_values":{"B":"SPARK","C":"2018-11-17
 13:33:33"},"location":"file:[not included in 
comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 
13%3A33%3A33","partition_parameters":{"numFiles":"1","totalSize":"15", [...]
+
+
 -- !query
 DROP VIEW temp_v
 -- !query schema
@@ -936,3 +978,11 @@ DROP TABLE e
 struct<>
 -- !query output
 
+
+
+-- !query
+DROP TABLE f
+-- !query schema
+struct<>
+-- !query output
+
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 04f274e4af59..7cc556857774 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -66,6 +66,9 @@ trait SQLQueryTestHelper extends Logging {
         s""""location": "$notIncludedMsg/{warehouse_dir}/""")
       .replaceAll(s""""created_by":".*?"""", s""""created_by 
$notIncludedMsg":"None"""")
       .replaceAll(s""""created_time":".*?"""", s""""created_time 
$notIncludedMsg":"None"""")
+      .replaceAll(s"transient_lastDdlTime=\\d+", 
s"transient_lastDdlTime=$notIncludedMsg")
+      .replaceAll(s""""transient_lastDdlTime":"\\d+"""",
+        s""""transient_lastDdlTime $notIncludedMsg":"None"""")
       .replaceAll(s""""last_access":".*?"""", s""""last_access 
$notIncludedMsg":"None"""")
       .replaceAll(s""""owner":".*?"""", s""""owner $notIncludedMsg":"None"""")
       .replaceAll(s""""partition_statistics":"\\d+"""",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch branch-4.0 updated: [SPARK-51307][SQL] locationUri in CatalogStorageFormat shall be decoded for display

Reply via email to