Github user hvanhovell commented on a diff in the pull request:
https://github.com/apache/spark/pull/16233#discussion_r94041766
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
---
@@ -510,32 +510,121 @@ class Analyzer(
* Replaces [[UnresolvedRelation]]s with concrete relations from the
catalog.
*/
object ResolveRelations extends Rule[LogicalPlan] {
- private def lookupTableFromCatalog(u: UnresolvedRelation): LogicalPlan
= {
+
+ // If the unresolved relation is running directly on files, we just
return the original
+ // UnresolvedRelation, the plan will get resolved later. Else we look
up the table from catalog
+ // and change the default database name if it is a view.
+ // We usually look up a table from the default database if the table
identifier has an empty
+ // database part, for a view the default database should be the
currentDb when the view was
+ // created. When the case comes to resolving a nested view, the view
may have different default
+ // database with that the referenced view has, so we need to use the
variable `defaultDatabase`
+ // to track the current default database.
+ // When the relation we resolve is a view, we fetch the
view.desc(which is a CatalogTable), and
+ // then set the value of `CatalogTable.viewDefaultDatabase` to the
variable `defaultDatabase`,
+ // we look up the relations that the view references using the default
database.
+ // For example:
+ // |- view1 (defaultDatabase = db1)
+ // |- operator
+ // |- table2 (defaultDatabase = db1)
+ // |- view2 (defaultDatabase = db2)
+ // |- view3 (defaultDatabase = db3)
+ // |- view4 (defaultDatabase = db4)
+ // In this case, the view `view1` is a nested view, it directly
references `table2`ã`view2`
+ // and `view4`, the view `view2` references `view3`. On resolving the
table, we look up the
+ // relations `table2`ã`view2`ã`view4` using the default database
`db1`, and look up the
+ // relation `view3` using the default database `db2`.
+ //
+ // Note this is compatible with the views defined by older versions of
Spark(before 2.2), which
+ // have empty defaultDatabase and all the relations in viewText have
database part defined.
+ def resolveRelation(
+ plan: LogicalPlan,
+ defaultDatabase: Option[String] = None): LogicalPlan = plan match {
+ case u @ UnresolvedRelation(table: TableIdentifier, _) if
isRunningDirectlyOnFiles(table) =>
+ u
+ case u: UnresolvedRelation =>
+ resolveView(lookupTableFromCatalog(u, defaultDatabase))
+ }
+
+ def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+ case i @ InsertIntoTable(u: UnresolvedRelation, parts, child, _, _)
if child.resolved =>
+ i.copy(table = EliminateSubqueryAliases(lookupTableFromCatalog(u)))
+ case u: UnresolvedRelation => resolveRelation(u)
+ }
+
+ // Look up the table with the given name from catalog. The database we
look up the table from
+ // is decided follow the steps:
+ // 1. If the database part is defined in the table identifier, use
that database name;
+ // 2. Else If the defaultDatabase is defined, use the default database
name;
+ // 3. Else use the currentDb of the SessionCatalog.
+ private def lookupTableFromCatalog(
+ u: UnresolvedRelation,
+ defaultDatabase: Option[String] = None): LogicalPlan = {
try {
- catalog.lookupRelation(u.tableIdentifier, u.alias)
+ catalog.lookupRelation(u.tableIdentifier, u.alias, defaultDatabase)
} catch {
case _: NoSuchTableException =>
u.failAnalysis(s"Table or view not found: ${u.tableName}")
}
}
- def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case i @ InsertIntoTable(u: UnresolvedRelation, parts, child, _, _)
if child.resolved =>
- i.copy(table = EliminateSubqueryAliases(lookupTableFromCatalog(u)))
- case u: UnresolvedRelation =>
- val table = u.tableIdentifier
- if (table.database.isDefined && conf.runSQLonFile &&
!catalog.isTemporaryTable(table) &&
- (!catalog.databaseExists(table.database.get) ||
!catalog.tableExists(table))) {
- // If the database part is specified, and we support running SQL
directly on files, and
- // it's not a temporary view, and the table does not exist, then
let's just return the
- // original UnresolvedRelation. It is possible we are matching a
query like "select *
- // from parquet.`/path/to/query`". The plan will get resolved
later.
- // Note that we are testing (!db_exists || !table_exists)
because the catalog throws
- // an exception from tableExists if the database does not exist.
- u
- } else {
- lookupTableFromCatalog(u)
+ // If the database part is specified, and we support running SQL
directly on files, and
+ // it's not a temporary view, and the table does not exist, then let's
just return the
+ // original UnresolvedRelation. It is possible we are matching a query
like "select *
+ // from parquet.`/path/to/query`". The plan will get resolved later.
+ // Note that we are testing (!db_exists || !table_exists) because the
catalog throws
+ // an exception from tableExists if the database does not exist.
+ private def isRunningDirectlyOnFiles(table: TableIdentifier): Boolean
= {
+ table.database.isDefined && conf.runSQLonFile &&
!catalog.isTemporaryTable(table) &&
+ (!catalog.databaseExists(table.database.get) ||
!catalog.tableExists(table))
+ }
+
+ // Change the default database name if the plan is a view, and
transformDown with the new
+ // database name to resolve all UnresolvedRelations and Views.
+ def resolveView(plan: LogicalPlan): LogicalPlan = plan match {
+ case view: View =>
+ val desc = view.desc
+ val defaultDatabase = desc.viewDefaultDatabase
+ // If a view is a datasource table(the table provider is defined
and is not hive), the
--- End diff --
When does this happen?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]