maropu commented on a change in pull request #22198: [SPARK-25121][SQL]
Supports multi-part table names for broadcast hint resolution
URL: https://github.com/apache/spark/pull/22198#discussion_r255823705
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
##########
@@ -47,20 +49,49 @@ object ResolveHints {
*
* This rule must happen before common table expressions.
*/
- class ResolveBroadcastHints(conf: SQLConf) extends Rule[LogicalPlan] {
+ class ResolveBroadcastHints(conf: SQLConf, catalog: SessionCatalog) extends
Rule[LogicalPlan] {
private val BROADCAST_HINT_NAMES = Set("BROADCAST", "BROADCASTJOIN",
"MAPJOIN")
def resolver: Resolver = conf.resolver
- private def applyBroadcastHint(plan: LogicalPlan, toBroadcast:
Set[String]): LogicalPlan = {
+ private def namePartsWithDatabase(nameParts: Seq[String], database:
String): Seq[String] = {
+ if (nameParts.size == 1) {
+ database +: nameParts
+ } else {
+ nameParts
+ }
+ }
+
+ private def matchedTableIdentifier(
+ nameParts: Seq[String],
+ tableIdent: IdentifierWithDatabase): Boolean = {
+ tableIdent.database match {
+ case Some(db) if resolver(catalog.globalTempViewManager.database, db)
=>
+ val identifierList = db :: tableIdent.identifier :: Nil
+ namePartsWithDatabase(nameParts,
catalog.globalTempViewManager.database)
+ .corresponds(identifierList)(resolver)
+ case None if catalog.getTempView(tableIdent.identifier).isDefined =>
+ nameParts.size == 1 && resolver(nameParts.head,
tableIdent.identifier)
+ case _ =>
+ val db = tableIdent.database.getOrElse(catalog.getCurrentDatabase)
Review comment:
Sorry, but I forgot the previous discussion:
https://github.com/apache/spark/pull/22198#issuecomment-416667343
To avoid the complexity, we would better to keep the current behaviour: it
just ignores a database name in multi-part names. So, in the example you
described, we assume Spark simply applies the hint into both. Thought?
I don't update this pr yet cuz I just was waiting for other developer's
comments.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]