This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 915f0cc  [SPARK-38236][SQL][3.2][3.1] Check if table location is 
absolute by "new Path(locationUri).isAbsolute" in create/alter table
915f0cc is described below

commit 915f0cc5a594da567b9e83fba05a3eb7897c739c
Author: Bo Zhang <bo.zh...@databricks.com>
AuthorDate: Thu Feb 24 15:07:38 2022 +0800

    [SPARK-38236][SQL][3.2][3.1] Check if table location is absolute by "new 
Path(locationUri).isAbsolute" in create/alter table
    
    ### What changes were proposed in this pull request?
    After https://github.com/apache/spark/pull/28527, we change to create table 
under the database location when the table location is relative. However the 
criteria to determine if a table location is relative/absolute is 
`URI.isAbsolute`, which basically checks if the table location URI has a scheme 
defined. So table URIs like `/table/path` are treated as relative and the 
scheme and authority of the database location URI are used to create the table. 
For example, when the database locat [...]
    
    This change fixes that by treating table location as absolute when the 
first letter of its path is slash.
    
    This also applies to alter table.
    
    ### Why are the changes needed?
    This is to fix the behavior described above.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. When users try to create/alter a table with a location that starts 
with a slash but without a scheme defined, the table will be created 
under/altered to the file system defined in `SessionCatalog.hadoopConf`, 
instead of the one defined in the database location URI.
    
    ### How was this patch tested?
    Updated unit tests.
    
    Closes #35591 from bozhang2820/spark-31709-3.2.
    
    Authored-by: Bo Zhang <bo.zh...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../org/apache/spark/sql/avro/AvroSuite.scala      |  2 ++
 .../spark/mllib/util/MLlibTestSparkContext.scala   |  2 ++
 .../sql/catalyst/catalog/SessionCatalog.scala      |  2 ++
 .../datasources/v2/V2SessionCatalogSuite.scala     | 25 ++++++++++++++++------
 4 files changed, 25 insertions(+), 6 deletions(-)

diff --git 
a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala 
b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 2faa8e6..7ed9d78 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -68,6 +68,8 @@ abstract class AvroSuite
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
+    // initialize SessionCatalog here so it has a clean hadoopConf
+    spark.sessionState.catalog
     spark.conf.set(SQLConf.FILES_MAX_PARTITION_BYTES.key, 1024)
   }
 
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index 5eb128a..3a7040d 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -40,6 +40,8 @@ trait MLlibTestSparkContext extends TempDirectory { self: 
Suite =>
       .appName("MLlibUnitTest")
       .getOrCreate()
     sc = spark.sparkContext
+    // initialize SessionCatalog here so it has a clean hadoopConf
+    spark.sessionState.catalog
 
     checkpointDir = Utils.createDirectory(tempDir.getCanonicalPath, 
"checkpoints").toString
     sc.setCheckpointDir(checkpointDir)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a4694a6..314997a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -393,6 +393,8 @@ class SessionCatalog(
   private def makeQualifiedTablePath(locationUri: URI, database: String): URI 
= {
     if (locationUri.isAbsolute) {
       locationUri
+    } else if (new Path(locationUri).isAbsolute) {
+      makeQualifiedPath(locationUri)
     } else {
       val dbName = formatDatabaseName(database)
       val dbLocation = 
makeQualifiedDBPath(getDatabaseMetadata(dbName).locationUri)
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 1a4f084..d689518 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql.AnalysisException
 import 
org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, 
NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, 
NamespaceChange, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, 
NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, 
StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -60,7 +60,8 @@ class V2SessionCatalogTableSuite extends 
V2SessionCatalogBaseSuite {
     super.beforeAll()
     val catalog = newCatalog()
     catalog.createNamespace(Array("db"), emptyProps)
-    catalog.createNamespace(Array("db2"), emptyProps)
+    catalog.createNamespace(Array("db2"),
+      Map(SupportsNamespaces.PROP_LOCATION -> "file:///db2.db").asJava)
     catalog.createNamespace(Array("ns"), emptyProps)
     catalog.createNamespace(Array("ns2"), emptyProps)
   }
@@ -186,10 +187,17 @@ class V2SessionCatalogTableSuite extends 
V2SessionCatalogBaseSuite {
     assert(t2.catalogTable.location === 
makeQualifiedPathWithWarehouse("db.db/relative/path"))
     catalog.dropTable(testIdent)
 
-    // absolute path
+    // absolute path without scheme
     properties.put(TableCatalog.PROP_LOCATION, "/absolute/path")
     val t3 = catalog.createTable(testIdent, schema, Array.empty, 
properties).asInstanceOf[V1Table]
-    assert(t3.catalogTable.location.toString === "file:/absolute/path")
+    assert(t3.catalogTable.location.toString === "file:///absolute/path")
+    catalog.dropTable(testIdent)
+
+    // absolute path with scheme
+    properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path")
+    val t4 = catalog.createTable(testIdent, schema, Array.empty, 
properties).asInstanceOf[V1Table]
+    assert(t4.catalogTable.location.toString === "file:/absolute/path")
+    catalog.dropTable(testIdent)
   }
 
   test("tableExists") {
@@ -686,10 +694,15 @@ class V2SessionCatalogTableSuite extends 
V2SessionCatalogBaseSuite {
       TableChange.setProperty(TableCatalog.PROP_LOCATION, 
"relative/path")).asInstanceOf[V1Table]
     assert(t2.catalogTable.location === 
makeQualifiedPathWithWarehouse("db.db/relative/path"))
 
-    // absolute path
+    // absolute path without scheme
     val t3 = catalog.alterTable(testIdent,
       TableChange.setProperty(TableCatalog.PROP_LOCATION, 
"/absolute/path")).asInstanceOf[V1Table]
-    assert(t3.catalogTable.location.toString === "file:/absolute/path")
+    assert(t3.catalogTable.location.toString === "file:///absolute/path")
+
+    // absolute path with scheme
+    val t4 = catalog.alterTable(testIdent, TableChange.setProperty(
+      TableCatalog.PROP_LOCATION, "file:/absolute/path")).asInstanceOf[V1Table]
+    assert(t4.catalogTable.location.toString === "file:/absolute/path")
   }
 
   test("dropTable") {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to