Github user kevinyu98 commented on a diff in the pull request:
https://github.com/apache/spark/pull/20795#discussion_r175248943
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
---
@@ -1192,11 +1195,23 @@ class Analyzer(
* @see https://issues.apache.org/jira/browse/SPARK-19737
*/
object LookupFunctions extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan =
plan.transformAllExpressions {
- case f: UnresolvedFunction if !catalog.functionExists(f.name) =>
- withPosition(f) {
- throw new
NoSuchFunctionException(f.name.database.getOrElse("default"), f.name.funcName)
- }
+ override def apply(plan: LogicalPlan): LogicalPlan = {
+ val catalogFunctionNameSet = new
mutable.HashSet[FunctionIdentifier]()
+ plan.transformAllExpressions {
+ case f: UnresolvedFunction if
catalogFunctionNameSet.contains(f.name) => f
+ case f: UnresolvedFunction if catalog.functionExists(f.name) =>
+ catalogFunctionNameSet.add(normalizeFuncName(f.name))
+ f
+ case f: UnresolvedFunction =>
+ withPosition(f) {
+ throw new
NoSuchFunctionException(f.name.database.getOrElse("default"),
+ f.name.funcName)
+ }
+ }
+ }
+
+ private def normalizeFuncName(name: FunctionIdentifier):
FunctionIdentifier = {
+ FunctionIdentifier(name.funcName.toLowerCase(Locale.ROOT),
name.database)
--- End diff --
@dilipbiswal @viirya Thanks for pointing this out. If we just use the
name.database, the cache will store "None" for the database name, the 2nd
function will not resolved from the local cache. We need to use the
catalog.getCurrentDatabase for the database name in the cache.
After running more test cases, I think it is better to cache the external
function name only, not include the build-in function. If we all agree this
approach, I can submit the code for review.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]