(spark) branch branch-3.5 updated: [SPARK-51307][SQL][3.5] locationUri in CatalogStorageFormat shall be decoded for display

yao Sat, 15 Mar 2025 09:27:35 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 4231d5824525 [SPARK-51307][SQL][3.5] locationUri in 
CatalogStorageFormat shall be decoded for display
4231d5824525 is described below

commit 4231d58245251a34ae80a38ea4bbf7d720caa439
Author: Kent Yao <y...@apache.org>
AuthorDate: Thu Mar 6 14:10:20 2025 +0800

    [SPARK-51307][SQL][3.5] locationUri in CatalogStorageFormat shall be 
decoded for display
    
    ### What changes were proposed in this pull request?
    
    This PR uses CatalogUtils.URIToString instead of URI.toString to decode the 
location URI.
    
    ### Why are the changes needed?
    
    For example, for partition specs like test1=X'16', 
test3=timestamp'2018-11-17 13:33:33', the stored path will include them as 
`test1=%16/test3=2018-11-17 13%3A33%3A33` because the special characters are 
escaped. Furthermore, while resolving the whole path string to a URI object, 
this path fragment becomes `test1=%2516/test3=2018-11-17 13%253A33%253A33`, so 
we need to decode `%25` -> `%` before displaying to users
    
    ### Does this PR introduce _any_ user-facing change?
    yes, DESC TABLE  will not show 2x-encoded paths.
    
    ### How was this patch tested?
    new tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #50164 from yaooqinn/SPARK-51307-35.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../spark/sql/catalyst/catalog/interface.scala     |  2 +-
 .../sql-tests/analyzer-results/describe.sql.out    | 67 +++++++++++++++-----
 .../test/resources/sql-tests/inputs/describe.sql   | 12 ++++
 .../resources/sql-tests/results/describe.sql.out   | 74 ++++++++++++++++++++++
 .../org/apache/spark/sql/SQLQueryTestHelper.scala  |  3 +
 5 files changed, 141 insertions(+), 17 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 6b72500f3f67..63b01e61fb8e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -76,7 +76,7 @@ case class CatalogStorageFormat(
 
   def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
     val map = new mutable.LinkedHashMap[String, String]()
-    locationUri.foreach(l => map.put("Location", l.toString))
+    locationUri.foreach(l => map.put("Location", CatalogUtils.URIToString(l)))
     serde.foreach(map.put("Serde Library", _))
     inputFormat.foreach(map.put("InputFormat", _))
     outputFormat.foreach(map.put("OutputFormat", _))
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
index b9fe5c1d74c1..69a83b08fbc5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
@@ -318,14 +318,7 @@ DropTableCommand `spark_catalog`.`default`.`v`, false, 
true, false
 -- !query
 CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING 
parquet COMMENT 'table_comment'
 -- !query analysis
-org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
-{
-  "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
-  "sqlState" : "42P07",
-  "messageParameters" : {
-    "relationName" : "`spark_catalog`.`default`.`d`"
-  }
-}
+CreateDataSourceTableCommand `spark_catalog`.`default`.`d`, false
 
 
 -- !query
@@ -355,14 +348,7 @@ DescribeTableCommand `spark_catalog`.`default`.`d`, true, 
[col_name#x, data_type
 -- !query
 CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 
42) USING parquet COMMENT 'table_comment'
 -- !query analysis
-org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
-{
-  "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
-  "sqlState" : "42P07",
-  "messageParameters" : {
-    "relationName" : "`spark_catalog`.`default`.`e`"
-  }
-}
+CreateDataSourceTableCommand `spark_catalog`.`default`.`e`, false
 
 
 -- !query
@@ -387,3 +373,52 @@ DescribeTableCommand `spark_catalog`.`default`.`e`, true, 
[col_name#x, data_type
 DESC FORMATTED e
 -- !query analysis
 DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, 
data_type#x, comment#x]
+
+
+-- !query
+CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, 
CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`f`, 
ErrorIfExists, [A, B, C]
+   +- Project [APACHE AS A#x, cast(SPARK as binary) AS B#x, 2018-11-17 
13:33:33 AS C#x]
+      +- OneRowRelation
+
+
+-- !query
+DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`f`, [B=SPARK, C=2018-11-17 
13:33:33], true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') 
AS JSON
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'JSON'",
+    "hint" : ": extra input 'JSON'"
+  }
+}
+
+
+-- !query
+DROP TABLE d
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.d
+
+
+-- !query
+DROP TABLE e
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.e
+
+
+-- !query
+DROP TABLE f
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.f
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql 
b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index b37931456d00..c784bc9fb74d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -119,3 +119,15 @@ DESC EXTENDED e;
 DESC TABLE EXTENDED e;
 
 DESC FORMATTED e;
+
+CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, 
CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C;
+
+DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33');
+
+DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') 
AS JSON;
+
+DROP TABLE d;
+
+DROP TABLE e;
+
+DROP TABLE f;
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out 
b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 10c27ea0cc79..824a0721ea75 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -864,3 +864,77 @@ Location [not included in comparison]/{warehouse_dir}/e
 # Column Default Values                                                    
 a                      string                  CONCAT('a\n b\n ', 'c\n d')
 b                      int                     42
+
+
+-- !query
+CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, 
CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+A                      string                                      
+B                      binary                                      
+C                      timestamp                                   
+# Partition Information                                                    
+# col_name             data_type               comment             
+B                      binary                                      
+C                      timestamp                                   
+                                                                   
+# Detailed Partition Information                                               
    
+Database               default                                     
+Table                  f                                           
+Partition Values       [B=SPARK, C=2018-11-17 13:33:33]                        
    
+Location [not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 
13%3A33%3A33                          
+Partition Parameters   {numFiles=1, totalSize=15, transient_lastDdlTime=[not 
included in comparison]}                      
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+                                                                   
+# Storage Information                                              
+Location [not included in comparison]/{warehouse_dir}/f
+
+
+-- !query
+DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') 
AS JSON
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'JSON'",
+    "hint" : ": extra input 'JSON'"
+  }
+}
+
+
+-- !query
+DROP TABLE d
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE e
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE f
+-- !query schema
+struct<>
+-- !query output
+
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index d8956961440d..7c9ef2e93f9d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -48,6 +48,9 @@ trait SQLQueryTestHelper extends Logging {
       .replaceAll(s"file:[^\\s,]*$clsName", 
s"file:$notIncludedMsg/{warehouse_dir}")
       .replaceAll("Created By.*", s"Created By $notIncludedMsg")
       .replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
+      .replaceAll(s"transient_lastDdlTime=\\d+", 
s"transient_lastDdlTime=$notIncludedMsg")
+      .replaceAll(s""""transient_lastDdlTime":"\\d+"""",
+        s""""transient_lastDdlTime $notIncludedMsg":"None"""")
       .replaceAll("Last Access.*", s"Last Access $notIncludedMsg")
       .replaceAll("Owner\t.*", s"Owner\t$notIncludedMsg")
       .replaceAll("Partition Statistics\t\\d+", s"Partition 
Statistics\t$notIncludedMsg")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch branch-3.5 updated: [SPARK-51307][SQL][3.5] locationUri in CatalogStorageFormat shall be decoded for display

Reply via email to