This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 89fe835b9 [KYUUBI #4336] Avoid listing all schemas for Spark session
catalog on schema pruning
89fe835b9 is described below
commit 89fe835b93ea26d1c2ce5d9991a284449d16caa2
Author: Cheng Pan <[email protected]>
AuthorDate: Thu Feb 16 15:05:53 2023 +0800
[KYUUBI #4336] Avoid listing all schemas for Spark session catalog on
schema pruning
### _Why are the changes needed?_
Some DBMS tools like DBeaver and HUE will call thrift meta api for listing
catalogs, databases, and tables. The current implementation of
`CatalogShim_v3_0#getSchemas` will call `listAllNamespaces` first and do schema
pruning on the Spark driver, which may cause "permission denied" exception when
HMS has permission control, like the ranger plugin.
This PR proposes to call HMS API(through v1 session catalog) directly for
`spark_catalog`, to suppress the above issue.
```
2023-02-15 20:02:13.048 ERROR
org.apache.kyuubi.server.KyuubiTBinaryFrontendService: Error getting schemas:
org.apache.kyuubi.KyuubiSQLException: Error operating GetSchemas:
org.apache.spark.sql.AnalysisException:
org.apache.hadoop.hive.ql.metadata.HiveException:
MetaException(message:Permission denied: user [user1] does not have [SELECT]
privilege on [userdb1])
at
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:134)
at
org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:249)
at
org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.databaseExists(ExternalCatalogWithListener.scala:69)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.databaseExists(SessionCatalog.scala:294)
at
org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog.listNamespaces(V2SessionCatalog.scala:212)
at
org.apache.kyuubi.engine.spark.shim.CatalogShim_v3_0.$anonfun$listAllNamespaces$1(CatalogShim_v3_0.scala:74)
at
org.apache.kyuubi.engine.spark.shim.CatalogShim_v3_0.$anonfun$listAllNamespaces$1$adapted(CatalogShim_v3_0.scala:73)
at
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:293)
at
scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at
scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at
scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at
scala.collection.TraversableLike.flatMap(TraversableLike.scala:293)
at
scala.collection.TraversableLike.flatMap$(TraversableLike.scala:290)
at
scala.collection.mutable.ArrayOps$ofRef.flatMap(ArrayOps.scala:198)
at
org.apache.kyuubi.engine.spark.shim.CatalogShim_v3_0.listAllNamespaces(CatalogShim_v3_0.scala:73)
at
org.apache.kyuubi.engine.spark.shim.CatalogShim_v3_0.listAllNamespaces(CatalogShim_v3_0.scala:90)
at
org.apache.kyuubi.engine.spark.shim.CatalogShim_v3_0.getSchemasWithPattern(CatalogShim_v3_0.scala:118)
at
org.apache.kyuubi.engine.spark.shim.CatalogShim_v3_0.getSchemas(CatalogShim_v3_0.scala:133)
at
org.apache.kyuubi.engine.spark.operation.GetSchemas.runInternal(GetSchemas.scala:43)
at
org.apache.kyuubi.operation.AbstractOperation.run(AbstractOperation.scala:164)
at
org.apache.kyuubi.session.AbstractSession.runOperation(AbstractSession.scala:99)
at
org.apache.kyuubi.engine.spark.session.SparkSessionImpl.runOperation(SparkSessionImpl.scala:78)
at
org.apache.kyuubi.session.AbstractSession.getSchemas(AbstractSession.scala:150)
at
org.apache.kyuubi.service.AbstractBackendService.getSchemas(AbstractBackendService.scala:83)
at
org.apache.kyuubi.service.TFrontendService.GetSchemas(TFrontendService.scala:294)
at
org.apache.kyuubi.shade.org.apache.hive.service.rpc.thrift.TCLIService$Processor$GetSchemas.getResult(TCLIService.java:1617)
at
org.apache.kyuubi.shade.org.apache.hive.service.rpc.thrift.TCLIService$Processor$GetSchemas.getResult(TCLIService.java:1602)
at
org.apache.kyuubi.shade.org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at
org.apache.kyuubi.shade.org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
at
org.apache.kyuubi.service.authentication.TSetIpAddressProcessor.process(TSetIpAddressProcessor.scala:36)
at
org.apache.kyuubi.shade.org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
```
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run
test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #4336 from pan3793/list-schemas.
Closes #4336
9ece864c [Cheng Pan] fix
f71587e9 [Cheng Pan] Avoid listing all schemas for Spark session catalog on
schema prunning
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Cheng Pan <[email protected]>
---
.../org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala | 2 +-
.../org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala | 9 ++++-----
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git
a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala
b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala
index 3478abc66..0f6195acf 100644
---
a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala
+++
b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v2_4.scala
@@ -41,7 +41,7 @@ class CatalogShim_v2_4 extends SparkCatalogShim {
catalogName: String,
schemaPattern: String): Seq[Row] = {
(spark.sessionState.catalog.listDatabases(schemaPattern) ++
- getGlobalTempViewManager(spark, schemaPattern)).map(Row(_, ""))
+ getGlobalTempViewManager(spark, schemaPattern)).map(Row(_,
SparkCatalogShim.SESSION_CATALOG))
}
def setCurrentDatabase(spark: SparkSession, databaseName: String): Unit = {
diff --git
a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala
b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala
index 50e641b59..a663ba636 100644
---
a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala
+++
b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/shim/CatalogShim_v3_0.scala
@@ -129,13 +129,12 @@ class CatalogShim_v3_0 extends CatalogShim_v2_4 {
spark: SparkSession,
catalogName: String,
schemaPattern: String): Seq[Row] = {
- val catalog = getCatalog(spark, catalogName)
- var schemas = getSchemasWithPattern(catalog, schemaPattern)
if (catalogName == SparkCatalogShim.SESSION_CATALOG) {
- val viewMgr = getGlobalTempViewManager(spark, schemaPattern)
- schemas = schemas ++ viewMgr
+ super.getSchemas(spark, catalogName, schemaPattern)
+ } else {
+ val catalog = getCatalog(spark, catalogName)
+ getSchemasWithPattern(catalog, schemaPattern).map(Row(_, catalog.name))
}
- schemas.map(Row(_, catalog.name))
}
override def setCurrentDatabase(spark: SparkSession, databaseName: String):
Unit = {