This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 915f0cc [SPARK-38236][SQL][3.2][3.1] Check if table location is absolute by "new Path(locationUri).isAbsolute" in create/alter table 915f0cc is described below commit 915f0cc5a594da567b9e83fba05a3eb7897c739c Author: Bo Zhang <bo.zh...@databricks.com> AuthorDate: Thu Feb 24 15:07:38 2022 +0800 [SPARK-38236][SQL][3.2][3.1] Check if table location is absolute by "new Path(locationUri).isAbsolute" in create/alter table ### What changes were proposed in this pull request? After https://github.com/apache/spark/pull/28527, we change to create table under the database location when the table location is relative. However the criteria to determine if a table location is relative/absolute is `URI.isAbsolute`, which basically checks if the table location URI has a scheme defined. So table URIs like `/table/path` are treated as relative and the scheme and authority of the database location URI are used to create the table. For example, when the database locat [...] This change fixes that by treating table location as absolute when the first letter of its path is slash. This also applies to alter table. ### Why are the changes needed? This is to fix the behavior described above. ### Does this PR introduce _any_ user-facing change? Yes. When users try to create/alter a table with a location that starts with a slash but without a scheme defined, the table will be created under/altered to the file system defined in `SessionCatalog.hadoopConf`, instead of the one defined in the database location URI. ### How was this patch tested? Updated unit tests. Closes #35591 from bozhang2820/spark-31709-3.2. Authored-by: Bo Zhang <bo.zh...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/avro/AvroSuite.scala | 2 ++ .../spark/mllib/util/MLlibTestSparkContext.scala | 2 ++ .../sql/catalyst/catalog/SessionCatalog.scala | 2 ++ .../datasources/v2/V2SessionCatalogSuite.scala | 25 ++++++++++++++++------ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 2faa8e6..7ed9d78 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -68,6 +68,8 @@ abstract class AvroSuite override protected def beforeAll(): Unit = { super.beforeAll() + // initialize SessionCatalog here so it has a clean hadoopConf + spark.sessionState.catalog spark.conf.set(SQLConf.FILES_MAX_PARTITION_BYTES.key, 1024) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala index 5eb128a..3a7040d 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala @@ -40,6 +40,8 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite => .appName("MLlibUnitTest") .getOrCreate() sc = spark.sparkContext + // initialize SessionCatalog here so it has a clean hadoopConf + spark.sessionState.catalog checkpointDir = Utils.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString sc.setCheckpointDir(checkpointDir) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index a4694a6..314997a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -393,6 +393,8 @@ class SessionCatalog( private def makeQualifiedTablePath(locationUri: URI, database: String): URI = { if (locationUri.isAbsolute) { locationUri + } else if (new Path(locationUri).isAbsolute) { + makeQualifiedPath(locationUri) } else { val dbName = formatDatabaseName(database) val dbLocation = makeQualifiedDBPath(getDatabaseMetadata(dbName).locationUri) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala index 1a4f084..d689518 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala @@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, TableCatalog, TableChange, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType} import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -60,7 +60,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { super.beforeAll() val catalog = newCatalog() catalog.createNamespace(Array("db"), emptyProps) - catalog.createNamespace(Array("db2"), emptyProps) + catalog.createNamespace(Array("db2"), + Map(SupportsNamespaces.PROP_LOCATION -> "file:///db2.db").asJava) catalog.createNamespace(Array("ns"), emptyProps) catalog.createNamespace(Array("ns2"), emptyProps) } @@ -186,10 +187,17 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path")) catalog.dropTable(testIdent) - // absolute path + // absolute path without scheme properties.put(TableCatalog.PROP_LOCATION, "/absolute/path") val t3 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table] - assert(t3.catalogTable.location.toString === "file:/absolute/path") + assert(t3.catalogTable.location.toString === "file:///absolute/path") + catalog.dropTable(testIdent) + + // absolute path with scheme + properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path") + val t4 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table] + assert(t4.catalogTable.location.toString === "file:/absolute/path") + catalog.dropTable(testIdent) } test("tableExists") { @@ -686,10 +694,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite { TableChange.setProperty(TableCatalog.PROP_LOCATION, "relative/path")).asInstanceOf[V1Table] assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path")) - // absolute path + // absolute path without scheme val t3 = catalog.alterTable(testIdent, TableChange.setProperty(TableCatalog.PROP_LOCATION, "/absolute/path")).asInstanceOf[V1Table] - assert(t3.catalogTable.location.toString === "file:/absolute/path") + assert(t3.catalogTable.location.toString === "file:///absolute/path") + + // absolute path with scheme + val t4 = catalog.alterTable(testIdent, TableChange.setProperty( + TableCatalog.PROP_LOCATION, "file:/absolute/path")).asInstanceOf[V1Table] + assert(t4.catalogTable.location.toString === "file:/absolute/path") } test("dropTable") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org