This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5ac76d9 [SPARK-36982] Migrate SHOW NAMESPACES to use V2 command by default 5ac76d9 is described below commit 5ac76d9cb45d58eeb4253d50e90060a68c3e87cb Author: Terry Kim <yumin...@gmail.com> AuthorDate: Wed Oct 13 22:12:56 2021 +0300 [SPARK-36982] Migrate SHOW NAMESPACES to use V2 command by default ### What changes were proposed in this pull request? This PR proposes to use V2 commands as default as outlined in [SPARK-36588](https://issues.apache.org/jira/browse/SPARK-36588), and this PR migrates `SHOW NAMESPACES` to use v2 command by default. (Technically speaking, there is no v1 command for `SHOW NAMESPACES/DATABASES`, but this PR removes an extra check in `ResolveSessionCatalog` to handle session catalog.) ### Why are the changes needed? It's been a while since we introduced the v2 commands, and it seems reasonable to use v2 commands by default even for the session catalog, with a legacy config to fall back to the v1 commands. ### Does this PR introduce _any_ user-facing change? No, the user can use v1 command by setting `spark.sql.legacy.useV1Command` to `true`. ### How was this patch tested? Added unit tests. Closes #34255 from imback82/migrate_show_namespaces. Authored-by: Terry Kim <yumin...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../sql/catalyst/analysis/KeepLegacyOutputs.scala | 5 +++- .../org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../catalyst/analysis/ResolveSessionCatalog.scala | 16 +--------- .../execution/command/DDLCommandTestUtils.scala | 7 +++-- .../command/ShowNamespacesSuiteBase.scala | 6 ++++ .../execution/command/TestsV1AndV2Commands.scala} | 35 ++++++++++++---------- .../execution/command/v1/CommandSuiteBase.scala | 3 +- .../execution/command/v1/ShowNamespacesSuite.scala | 8 ++--- .../sql/execution/command/v1/ShowTablesSuite.scala | 27 ++--------------- .../execution/command/v2/CommandSuiteBase.scala | 3 +- .../hive/execution/command/CommandSuiteBase.scala | 3 +- .../execution/command/ShowNamespacesSuite.scala | 2 ++ .../hive/execution/command/ShowTablesSuite.scala | 2 +- 13 files changed, 49 insertions(+), 70 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala index baee2bd..3af5544 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowTables} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowNamespaces, ShowTables} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND import org.apache.spark.sql.internal.SQLConf @@ -36,6 +36,9 @@ object KeepLegacyOutputs extends Rule[LogicalPlan] { assert(s.output.length == 3) val newOutput = s.output.head.withName("database") +: s.output.tail s.copy(output = newOutput) + case s: ShowNamespaces => + assert(s.output.length == 1) + s.copy(output = Seq(s.output.head.withName("databaseName"))) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 1dc4a8e..f0e7731 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3364,7 +3364,7 @@ object SQLConf { buildConf("spark.sql.legacy.keepCommandOutputSchema") .internal() .doc("When true, Spark will keep the output schema of commands such as SHOW DATABASES " + - "unchanged, for v1 catalog and/or table.") + "unchanged.") .version("3.0.2") .booleanConf .createWithDefault(false) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 2252225..c77be4e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -121,14 +121,6 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case SetNamespaceLocation(DatabaseInSessionCatalog(db), location) => AlterDatabaseSetLocationCommand(db, location) - case s @ ShowNamespaces(ResolvedNamespace(cata, _), _, output) if isSessionCatalog(cata) => - if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) { - assert(output.length == 1) - s.copy(output = Seq(output.head.withName("databaseName"))) - } else { - s - } - case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) => AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView) @@ -270,13 +262,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) DropDatabaseCommand(db, d.ifExists, d.cascade) case ShowTables(DatabaseInSessionCatalog(db), pattern, output) if conf.useV1Command => - val newOutput = if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) { - assert(output.length == 3) - output.head.withName("database") +: output.tail - } else { - output - } - ShowTablesCommand(Some(db), pattern, newOutput) + ShowTablesCommand(Some(db), pattern, output) case ShowTableExtended( DatabaseInSessionCatalog(db), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala index f9e26f8..af3e92a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala @@ -39,7 +39,9 @@ import org.apache.spark.sql.test.SQLTestUtils */ trait DDLCommandTestUtils extends SQLTestUtils { // The version of the catalog under testing such as "V1", "V2", "Hive V1". - protected def version: String + protected def catalogVersion: String + // The version of the SQL command under testing such as "V1", "V2". + protected def commandVersion: String // Name of the command as SQL statement, for instance "SHOW PARTITIONS" protected def command: String // The catalog name which can be used in SQL statements under testing @@ -51,7 +53,8 @@ trait DDLCommandTestUtils extends SQLTestUtils { // the failed test in logs belongs to. override def test(testName: String, testTags: Tag*)(testFun: => Any) (implicit pos: Position): Unit = { - super.test(s"$command $version: " + testName, testTags: _*)(testFun) + val testNamePrefix = s"$command using $catalogVersion catalog $commandVersion command" + super.test(s"$testNamePrefix: $testName", testTags: _*)(testFun) } protected def withNamespaceAndTable(ns: String, tableName: String, cat: String = catalog) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala index 790489e..1b37444 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala @@ -128,4 +128,10 @@ trait ShowNamespacesSuiteBase extends QueryTest with DDLCommandTestUtils { spark.sessionState.catalogManager.reset() } } + + test("SPARK-34359: keep the legacy output schema") { + withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") { + assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName")) + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala similarity index 52% copy from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala copy to sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala index baee2bd..15976f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/KeepLegacyOutputs.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TestsV1AndV2Commands.scala @@ -15,27 +15,30 @@ * limitations under the License. */ -package org.apache.spark.sql.catalyst.analysis +package org.apache.spark.sql.execution.command + +import org.scalactic.source.Position +import org.scalatest.Tag -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowTables} -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND import org.apache.spark.sql.internal.SQLConf /** - * A rule for keeping the SQL command's legacy outputs. + * The trait that enables running a test for both v1 and v2 command. */ -object KeepLegacyOutputs extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = { - if (!conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) { - plan - } else { - plan.resolveOperatorsUpWithPruning( - _.containsPattern(COMMAND)) { - case s: ShowTables => - assert(s.output.length == 3) - val newOutput = s.output.head.withName("database") +: s.output.tail - s.copy(output = newOutput) +trait TestsV1AndV2Commands extends DDLCommandTestUtils { + private var _version: String = "" + override def commandVersion: String = _version + + // Tests using V1 catalogs will run with `spark.sql.legacy.useV1Command` on and off + // to test both V1 and V2 commands. + override def test(testName: String, testTags: Tag*)(testFun: => Any) + (implicit pos: Position): Unit = { + Seq(true, false).foreach { useV1Command => + _version = if (useV1Command) "V1" else "V2" + super.test(testName, testTags: _*) { + withSQLConf(SQLConf.LEGACY_USE_V1_COMMAND.key -> useV1Command.toString) { + testFun + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala index 80c552d..24f3ac3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala @@ -28,7 +28,8 @@ import org.apache.spark.sql.test.SharedSparkSession * settings for all unified datasource V1 and V2 test suites. */ trait CommandSuiteBase extends SharedSparkSession { - def version: String = "V1" // The prefix is added to test names + def catalogVersion: String = "V1" // The catalog version is added to test names + def commandVersion: String = "V1" // The command version is added to test names def catalog: String = CatalogManager.SESSION_CATALOG_NAME def defaultUsing: String = "USING parquet" // The clause is used in creating tables under testing diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala index 52742a2..54c5d22 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala @@ -38,15 +38,11 @@ trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase { }.getMessage assert(errMsg.contains("Namespace 'dummy' not found")) } - - test("SPARK-34359: keep the legacy output schema") { - withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") { - assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName")) - } - } } class ShowNamespacesSuite extends ShowNamespacesSuiteBase with CommandSuiteBase { + override def commandVersion: String = "V2" // There is only V2 variant of SHOW NAMESPACES. + test("case sensitivity") { Seq(true, false).foreach { caseSensitive => withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala index 23b9b54..68ad1c4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala @@ -17,9 +17,6 @@ package org.apache.spark.sql.execution.command.v1 -import org.scalactic.source.Position -import org.scalatest.Tag - import org.apache.spark.sql.{AnalysisException, Row, SaveMode} import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException import org.apache.spark.sql.execution.command @@ -33,28 +30,8 @@ import org.apache.spark.sql.internal.SQLConf * - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowTablesSuite` * - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowTablesSuite` */ -trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { +trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase with command.TestsV1AndV2Commands { override def defaultNamespace: Seq[String] = Seq("default") - var _version: String = "" - override def version: String = _version - - // Tests using V1 catalogs will run with `spark.sql.legacy.useV1Command` on and off - // to test both V1 and V2 commands. - override def test(testName: String, testTags: Tag*)(testFun: => Any) - (implicit pos: Position): Unit = { - Seq(true, false).foreach { useV1Command => - _version = if (useV1Command) { - "using V1 catalog with V1 command" - } else { - "using V1 catalog with V2 command" - } - super.test(testName, testTags: _*) { - withSQLConf(SQLConf.LEGACY_USE_V1_COMMAND.key -> useV1Command.toString) { - testFun - } - } - } - } private def withSourceViews(f: => Unit): Unit = { withTable("source", "source2") { @@ -165,7 +142,7 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase { * The class contains tests for the `SHOW TABLES` command to check V1 In-Memory table catalog. */ class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase { - override def version: String = super[ShowTablesSuiteBase].version + override def commandVersion: String = super[ShowTablesSuiteBase].commandVersion test("SPARK-33670: show partitions from a datasource table") { import testImplicits._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala index bed04f4..1ff9e74 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala @@ -30,7 +30,8 @@ import org.apache.spark.sql.test.SharedSparkSession * for all unified datasource V1 and V2 test suites. */ trait CommandSuiteBase extends SharedSparkSession { - def version: String = "V2" // The prefix is added to test names + def catalogVersion: String = "V2" // The catalog version is added to test names + def commandVersion: String = "V2" // The command version is added to test names def catalog: String = "test_catalog" // The default V2 catalog for testing def defaultUsing: String = "USING _" // The clause is used in creating v2 tables under testing diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala index 0709b12..b07c507 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala @@ -29,7 +29,8 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton * settings for all unified datasource V1 and V2 test suites. */ trait CommandSuiteBase extends TestHiveSingleton { - def version: String = "Hive V1" // The prefix is added to test names + def catalogVersion: String = "Hive V1" // The catalog version is added to test names + def commandVersion: String = "V1" // The command version is added to test names def catalog: String = CatalogManager.SESSION_CATALOG_NAME def defaultUsing: String = "USING HIVE" // The clause is used in creating tables under testing diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala index 7aae000..015001f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala @@ -25,6 +25,8 @@ import org.apache.spark.sql.internal.SQLConf * V1 Hive external table catalog. */ class ShowNamespacesSuite extends v1.ShowNamespacesSuiteBase with CommandSuiteBase { + override def commandVersion: String = "V2" // There is only V2 variant of SHOW NAMESPACES. + test("case sensitivity") { Seq(true, false).foreach { caseSensitive => withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala index 6050618..653a157 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.execution.command.v1 * The class contains tests for the `SHOW TABLES` command to check V1 Hive external table catalog. */ class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase { - override def version: String = super[ShowTablesSuiteBase].version + override def commandVersion: String = super[ShowTablesSuiteBase].commandVersion test("hive client calls") { withNamespaceAndTable("ns", "tbl") { t => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org