Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/20795#discussion_r175248943 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala --- @@ -1192,11 +1195,23 @@ class Analyzer( * @see https://issues.apache.org/jira/browse/SPARK-19737 */ object LookupFunctions extends Rule[LogicalPlan] { - override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions { - case f: UnresolvedFunction if !catalog.functionExists(f.name) => - withPosition(f) { - throw new NoSuchFunctionException(f.name.database.getOrElse("default"), f.name.funcName) - } + override def apply(plan: LogicalPlan): LogicalPlan = { + val catalogFunctionNameSet = new mutable.HashSet[FunctionIdentifier]() + plan.transformAllExpressions { + case f: UnresolvedFunction if catalogFunctionNameSet.contains(f.name) => f + case f: UnresolvedFunction if catalog.functionExists(f.name) => + catalogFunctionNameSet.add(normalizeFuncName(f.name)) + f + case f: UnresolvedFunction => + withPosition(f) { + throw new NoSuchFunctionException(f.name.database.getOrElse("default"), + f.name.funcName) + } + } + } + + private def normalizeFuncName(name: FunctionIdentifier): FunctionIdentifier = { + FunctionIdentifier(name.funcName.toLowerCase(Locale.ROOT), name.database) --- End diff -- @dilipbiswal @viirya Thanks for pointing this out. If we just use the name.database, the cache will store "None" for the database name, the 2nd function will not resolved from the local cache. We need to use the catalog.getCurrentDatabase for the database name in the cache. After running more test cases, I think it is better to cache the external function name only, not include the build-in function. If we all agree this approach, I can submit the code for review.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org