Github user yhuai commented on a diff in the pull request:
https://github.com/apache/spark/pull/12051#discussion_r57826391
--- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
---
@@ -65,55 +67,97 @@ private[hive] class HiveFunctionRegistry(
val functionClassName = functionInfo.getFunctionClass.getName
- // When we instantiate hive UDF wrapper class, we may throw
exception if the input expressions
- // don't satisfy the hive UDF, such as type mismatch, input number
mismatch, etc. Here we
- // catch the exception and throw AnalysisException instead.
+ // Make an expression of the appropriate type based on the function
class.
try {
if
(classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
val udf = HiveGenericUDF(
name, new HiveFunctionWrapper(functionClassName,
functionInfo.getGenericUDF), children)
udf.dataType // Force it to check input data types.
udf
- } else if
(classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
- val udf = HiveSimpleUDF(name, new
HiveFunctionWrapper(functionClassName), children)
+ } else {
+ makeFunctionBuilder(name,
functionInfo.getFunctionClass)(children)
+ }
+ } catch {
+ case ae: AnalysisException =>
+ throw ae
+ case NonFatal(e) =>
+ // TODO: don't swallow stack trace here
+ throw new AnalysisException(
+ s"No handler for Hive UDF
'${functionInfo.getFunctionClass.getCanonicalName}': $e")
+ }
+ }
+ }
+
+ /**
+ * Construct a [[FunctionBuilder]] based on the provided class that
represents a function.
+ *
+ * This performs reflection to decide what type of [[Expression]] to
return in the builder.
+ * This is useful for creating temporary functions.
+ */
+ override def makeFunctionBuilder(funcName: String, funcClassName:
String): FunctionBuilder = {
+ makeFunctionBuilder(funcName, Utils.classForName(funcClassName))
+ }
+
+ /**
+ * Construct a [[FunctionBuilder]] based on the provided class that
represents a function.
+ */
+ private def makeFunctionBuilder(name: String, clazz: Class[_]):
FunctionBuilder = {
+ // When we instantiate hive UDF wrapper class, we may throw exception
if the input
+ // expressions don't satisfy the hive UDF, such as type mismatch,
input number
+ // mismatch, etc. Here we catch the exception and throw
AnalysisException instead.
+ try {
+ if (classOf[UDF].isAssignableFrom(clazz)) {
+ (children: Seq[Expression]) => {
+ val udf = HiveSimpleUDF(name, new
HiveFunctionWrapper(clazz.getName), children)
udf.dataType // Force it to check input data types.
udf
- } else if
(classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) {
- val udf = HiveGenericUDF(name, new
HiveFunctionWrapper(functionClassName), children)
+ }
+ } else if (classOf[GenericUDF].isAssignableFrom(clazz)) {
+ (children: Seq[Expression]) => {
+ val udf = HiveGenericUDF(name, new
HiveFunctionWrapper(clazz.getName), children)
udf.dataType // Force it to check input data types.
udf
- } else if (
-
classOf[AbstractGenericUDAFResolver].isAssignableFrom(functionInfo.getFunctionClass))
{
- val udaf = HiveUDAFFunction(name, new
HiveFunctionWrapper(functionClassName), children)
+ }
+ } else if
(classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) {
+ (children: Seq[Expression]) => {
+ val udaf = HiveUDAFFunction(name, new
HiveFunctionWrapper(clazz.getName), children)
udaf.dataType // Force it to check input data types.
udaf
- } else if
(classOf[UDAF].isAssignableFrom(functionInfo.getFunctionClass)) {
+ }
+ } else if (classOf[UDAF].isAssignableFrom(clazz)) {
+ (children: Seq[Expression]) => {
val udaf = HiveUDAFFunction(
- name, new HiveFunctionWrapper(functionClassName), children,
isUDAFBridgeRequired = true)
+ name,
+ new HiveFunctionWrapper(clazz.getName),
+ children,
+ isUDAFBridgeRequired = true)
udaf.dataType // Force it to check input data types.
udaf
- } else if
(classOf[GenericUDTF].isAssignableFrom(functionInfo.getFunctionClass)) {
- val udtf = HiveGenericUDTF(name, new
HiveFunctionWrapper(functionClassName), children)
+ }
+ } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
+ (children: Seq[Expression]) => {
+ val udtf = HiveGenericUDTF(name, new
HiveFunctionWrapper(clazz.getName), children)
udtf.elementTypes // Force it to check input data types.
udtf
- } else {
- throw new AnalysisException(s"No handler for udf
${functionInfo.getFunctionClass}")
}
- } catch {
- case analysisException: AnalysisException =>
- // If the exception is an AnalysisException, just throw it.
- throw analysisException
- case throwable: Throwable =>
- // If there is any other error, we throw an AnalysisException.
- val errorMessage = s"No handler for Hive udf
${functionInfo.getFunctionClass} " +
- s"because: ${throwable.getMessage}."
- throw new AnalysisException(errorMessage)
+ } else {
+ throw new AnalysisException(s"No handler for UDF
'${clazz.getCanonicalName}'")
}
+ } catch {
+ case ae: AnalysisException =>
+ throw ae
+ case NonFatal(e) =>
+ // TODO: don't swallow stack trace here
+ throw new AnalysisException(s"No handler for UDF
'${clazz.getCanonicalName}': $e")
--- End diff --
I guess we can explicitly set the stacktrace?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]