This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new c29cf34bfc6 [SPARK-42823][SQL] `spark-sql` shell supports multipart
namespaces for initialization
c29cf34bfc6 is described below
commit c29cf34bfc694cd3d959c82a25adf251975f0817
Author: Kent Yao <[email protected]>
AuthorDate: Thu Mar 16 20:29:16 2023 -0700
[SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for
initialization
### What changes were proposed in this pull request?
Currently, we only support initializing spark-sql shell with a single-part
schema, which also must be forced to the session catalog.
#### case 1, specifying catalog field for v1sessioncatalog
```sql
bin/spark-sql --database spark_catalog.default
Exception in thread "main"
org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database
'spark_catalog.default' not found
```
#### case 2, setting the default catalog to another one
```sql
bin/spark-sql -c spark.sql.defaultCatalog=testcat -c
spark.sql.catalog.testcat=org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-c spark.sql.catalog.testcat.url='jdbc:derby:memory:testcat;create=true' -c
spark.sql.catalog.testcat.driver=org.apache.derby.jdbc.AutoloadedDriver -c
spark.sql.catalogImplementation=in-memory --database SYS
23/03/16 18:40:49 WARN ObjectStore: Failed to get database sys, returning
NoSuchObjectException
Exception in thread "main"
org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'sys'
not found
```
In this PR, we switch to use-statement to support multipart namespaces,
which helps us resovle
to catalog correctly.
### Why are the changes needed?
Make spark-sql shell better support the v2 catalog framework.
### Does this PR introduce _any_ user-facing change?
Yes, `--database` option supports multipart namespaces and works for v2
catalogs now. And you will see this behavior on spark web ui.
### How was this patch tested?
new ut
Closes #40457 from yaooqinn/SPARK-42823.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 2000d5f8db838db62967a45d574728a8bf2aaf6b)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 15 ++++++-------
.../spark/sql/hive/thriftserver/CliSuite.scala | 26 ++++++++++++++++++++++
2 files changed, 33 insertions(+), 8 deletions(-)
diff --git
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 51b314ad2c1..22df4e67440 100644
---
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -201,14 +201,6 @@ private[hive] object SparkSQLCLIDriver extends Logging {
case e: UnsupportedEncodingException => exit(ERROR_PATH_NOT_FOUND)
}
- if (sessionState.database != null) {
- SparkSQLEnv.sqlContext.sessionState.catalog.setCurrentDatabase(
- s"${sessionState.database}")
- }
-
- // Execute -i init files (always in silent mode)
- cli.processInitFiles(sessionState)
-
// We don't propagate hive.metastore.warehouse.dir, because it might has
been adjusted in
// [[SharedState.loadHiveConfFile]] based on the user specified or default
values of
// spark.sql.warehouse.dir and hive.metastore.warehouse.dir.
@@ -216,6 +208,13 @@ private[hive] object SparkSQLCLIDriver extends Logging {
SparkSQLEnv.sqlContext.setConf(k, v)
}
+ if (sessionState.database != null) {
+ SparkSQLEnv.sqlContext.sql(s"USE ${sessionState.database}")
+ }
+
+ // Execute -i init files (always in silent mode)
+ cli.processInitFiles(sessionState)
+
cli.printMasterAndAppId
if (sessionState.execString != null) {
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 5413635ba47..651c6b7aafb 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext,
SparkFunSuite}
import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.HiveUtils._
import org.apache.spark.sql.hive.client.HiveClientImpl
@@ -806,4 +807,29 @@ class CliSuite extends SparkFunSuite {
prompt = "spark-sql (spark_42448)>")(
"select current_database();" -> "spark_42448")
}
+
+ test("SPARK-42823: multipart identifier support for specify database by
--database option") {
+ val catalogName = "testcat"
+ val catalogImpl =
s"spark.sql.catalog.$catalogName=${classOf[JDBCTableCatalog].getName}"
+ val catalogUrl =
+
s"spark.sql.catalog.$catalogName.url=jdbc:derby:memory:$catalogName;create=true"
+ val catalogDriver =
+
s"spark.sql.catalog.$catalogName.driver=org.apache.derby.jdbc.AutoloadedDriver"
+ val database = s"-database $catalogName.SYS"
+ val catalogConfigs =
+ Seq(catalogImpl, catalogDriver, catalogUrl,
"spark.sql.catalogImplementation=in-memory")
+ .flatMap(Seq("--conf", _))
+ runCliWithin(
+ 2.minute,
+ catalogConfigs ++ Seq("--database", s"$catalogName.SYS"))(
+ "SELECT CURRENT_CATALOG();" -> catalogName,
+ "SELECT CURRENT_SCHEMA();" -> "SYS")
+
+ runCliWithin(
+ 2.minute,
+ catalogConfigs ++
+ Seq("--conf", s"spark.sql.defaultCatalog=$catalogName", "--database",
"SYS"))(
+ "SELECT CURRENT_CATALOG();" -> catalogName,
+ "SELECT CURRENT_SCHEMA();" -> "SYS")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]