This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 915f0cc [SPARK-38236][SQL][3.2][3.1] Check if table location is
absolute by "new Path(locationUri).isAbsolute" in create/alter table
915f0cc is described below
commit 915f0cc5a594da567b9e83fba05a3eb7897c739c
Author: Bo Zhang <[email protected]>
AuthorDate: Thu Feb 24 15:07:38 2022 +0800
[SPARK-38236][SQL][3.2][3.1] Check if table location is absolute by "new
Path(locationUri).isAbsolute" in create/alter table
### What changes were proposed in this pull request?
After https://github.com/apache/spark/pull/28527, we change to create table
under the database location when the table location is relative. However the
criteria to determine if a table location is relative/absolute is
`URI.isAbsolute`, which basically checks if the table location URI has a scheme
defined. So table URIs like `/table/path` are treated as relative and the
scheme and authority of the database location URI are used to create the table.
For example, when the database locat [...]
This change fixes that by treating table location as absolute when the
first letter of its path is slash.
This also applies to alter table.
### Why are the changes needed?
This is to fix the behavior described above.
### Does this PR introduce _any_ user-facing change?
Yes. When users try to create/alter a table with a location that starts
with a slash but without a scheme defined, the table will be created
under/altered to the file system defined in `SessionCatalog.hadoopConf`,
instead of the one defined in the database location URI.
### How was this patch tested?
Updated unit tests.
Closes #35591 from bozhang2820/spark-31709-3.2.
Authored-by: Bo Zhang <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/avro/AvroSuite.scala | 2 ++
.../spark/mllib/util/MLlibTestSparkContext.scala | 2 ++
.../sql/catalyst/catalog/SessionCatalog.scala | 2 ++
.../datasources/v2/V2SessionCatalogSuite.scala | 25 ++++++++++++++++------
4 files changed, 25 insertions(+), 6 deletions(-)
diff --git
a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 2faa8e6..7ed9d78 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -68,6 +68,8 @@ abstract class AvroSuite
override protected def beforeAll(): Unit = {
super.beforeAll()
+ // initialize SessionCatalog here so it has a clean hadoopConf
+ spark.sessionState.catalog
spark.conf.set(SQLConf.FILES_MAX_PARTITION_BYTES.key, 1024)
}
diff --git
a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index 5eb128a..3a7040d 100644
---
a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++
b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -40,6 +40,8 @@ trait MLlibTestSparkContext extends TempDirectory { self:
Suite =>
.appName("MLlibUnitTest")
.getOrCreate()
sc = spark.sparkContext
+ // initialize SessionCatalog here so it has a clean hadoopConf
+ spark.sessionState.catalog
checkpointDir = Utils.createDirectory(tempDir.getCanonicalPath,
"checkpoints").toString
sc.setCheckpointDir(checkpointDir)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a4694a6..314997a 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -393,6 +393,8 @@ class SessionCatalog(
private def makeQualifiedTablePath(locationUri: URI, database: String): URI
= {
if (locationUri.isAbsolute) {
locationUri
+ } else if (new Path(locationUri).isAbsolute) {
+ makeQualifiedPath(locationUri)
} else {
val dbName = formatDatabaseName(database)
val dbLocation =
makeQualifiedDBPath(getDatabaseMetadata(dbName).locationUri)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 1a4f084..d689518 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter
import org.apache.spark.sql.AnalysisException
import
org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException,
NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier,
NamespaceChange, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier,
NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType,
StringType, StructField, StructType, TimestampType}
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -60,7 +60,8 @@ class V2SessionCatalogTableSuite extends
V2SessionCatalogBaseSuite {
super.beforeAll()
val catalog = newCatalog()
catalog.createNamespace(Array("db"), emptyProps)
- catalog.createNamespace(Array("db2"), emptyProps)
+ catalog.createNamespace(Array("db2"),
+ Map(SupportsNamespaces.PROP_LOCATION -> "file:///db2.db").asJava)
catalog.createNamespace(Array("ns"), emptyProps)
catalog.createNamespace(Array("ns2"), emptyProps)
}
@@ -186,10 +187,17 @@ class V2SessionCatalogTableSuite extends
V2SessionCatalogBaseSuite {
assert(t2.catalogTable.location ===
makeQualifiedPathWithWarehouse("db.db/relative/path"))
catalog.dropTable(testIdent)
- // absolute path
+ // absolute path without scheme
properties.put(TableCatalog.PROP_LOCATION, "/absolute/path")
val t3 = catalog.createTable(testIdent, schema, Array.empty,
properties).asInstanceOf[V1Table]
- assert(t3.catalogTable.location.toString === "file:/absolute/path")
+ assert(t3.catalogTable.location.toString === "file:///absolute/path")
+ catalog.dropTable(testIdent)
+
+ // absolute path with scheme
+ properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path")
+ val t4 = catalog.createTable(testIdent, schema, Array.empty,
properties).asInstanceOf[V1Table]
+ assert(t4.catalogTable.location.toString === "file:/absolute/path")
+ catalog.dropTable(testIdent)
}
test("tableExists") {
@@ -686,10 +694,15 @@ class V2SessionCatalogTableSuite extends
V2SessionCatalogBaseSuite {
TableChange.setProperty(TableCatalog.PROP_LOCATION,
"relative/path")).asInstanceOf[V1Table]
assert(t2.catalogTable.location ===
makeQualifiedPathWithWarehouse("db.db/relative/path"))
- // absolute path
+ // absolute path without scheme
val t3 = catalog.alterTable(testIdent,
TableChange.setProperty(TableCatalog.PROP_LOCATION,
"/absolute/path")).asInstanceOf[V1Table]
- assert(t3.catalogTable.location.toString === "file:/absolute/path")
+ assert(t3.catalogTable.location.toString === "file:///absolute/path")
+
+ // absolute path with scheme
+ val t4 = catalog.alterTable(testIdent, TableChange.setProperty(
+ TableCatalog.PROP_LOCATION, "file:/absolute/path")).asInstanceOf[V1Table]
+ assert(t4.catalogTable.location.toString === "file:/absolute/path")
}
test("dropTable") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]