[GitHub] spark pull request #20795: [SPARK-23486]cache the function name from the ext...

dilipbiswal Thu, 12 Jul 2018 11:05:39 -0700

Github user dilipbiswal commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20795#discussion_r202128129
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 ---
    @@ -1204,16 +1207,32 @@ class Analyzer(
        * only performs simple existence check according to the function 
identifier to quickly identify
        * undefined functions without triggering relation resolution, which may 
incur potentially
        * expensive partition/schema discovery process in some cases.
    -   *
    +   * In order to avoid duplicate external functions lookup, the external 
function identifier will
    +   * store in the local hash set externalFunctionNameSet.
        * @see [[ResolveFunctions]]
        * @see https://issues.apache.org/jira/browse/SPARK-19737
        */
       object LookupFunctions extends Rule[LogicalPlan] {
    -    override def apply(plan: LogicalPlan): LogicalPlan = 
plan.transformAllExpressions {
    -      case f: UnresolvedFunction if !catalog.functionExists(f.name) =>
    -        withPosition(f) {
    -          throw new 
NoSuchFunctionException(f.name.database.getOrElse("default"), f.name.funcName)
    -        }
    +    override def apply(plan: LogicalPlan): LogicalPlan = {
    +      val externalFunctionNameSet = new 
mutable.HashSet[FunctionIdentifier]()
    +      plan.transformAllExpressions {
    +        case f: UnresolvedFunction
    +          if externalFunctionNameSet.contains(normalizeFuncName(f.name)) 
=> f
    +        case f: UnresolvedFunction if catalog.isRegisteredFunction(f.name) 
=> f
    +        case f: UnresolvedFunction if catalog.isPersistentFunction(f.name) 
=>
    +          externalFunctionNameSet.add(normalizeFuncName(f.name))
    +          f
    +        case f: UnresolvedFunction =>
    +          withPosition(f) {
    +            throw new 
NoSuchFunctionException(f.name.database.getOrElse(catalog.getCurrentDatabase),
    +              f.name.funcName)
    +          }
    +      }
    +    }
    +
    +    def normalizeFuncName(name: FunctionIdentifier): FunctionIdentifier = {
    +      FunctionIdentifier(name.funcName.toLowerCase(Locale.ROOT),
    +        name.database.orElse(Some(catalog.getCurrentDatabase)))
    --- End diff --
    
    @kevinyu98 how about consideration of conf.caseSensitiveAnalysis ?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #20795: [SPARK-23486]cache the function name from the ext...

Reply via email to