Github user liancheng commented on a diff in the pull request:
https://github.com/apache/spark/pull/3640#discussion_r21512352
--- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
---
@@ -54,47 +54,95 @@ private[hive] abstract class HiveFunctionRegistry
val functionClassName = functionInfo.getFunctionClass.getName
if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
- HiveSimpleUdf(functionClassName, children)
+ HiveSimpleUdf(new HiveFunctionCache(functionClassName), children)
} else if
(classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) {
- HiveGenericUdf(functionClassName, children)
+ HiveGenericUdf(new HiveFunctionCache(functionClassName), children)
} else if (
classOf[AbstractGenericUDAFResolver].isAssignableFrom(functionInfo.getFunctionClass))
{
- HiveGenericUdaf(functionClassName, children)
+ HiveGenericUdaf(new HiveFunctionCache(functionClassName), children)
} else if
(classOf[UDAF].isAssignableFrom(functionInfo.getFunctionClass)) {
- HiveUdaf(functionClassName, children)
+ HiveUdaf(new HiveFunctionCache(functionClassName), children)
} else if
(classOf[GenericUDTF].isAssignableFrom(functionInfo.getFunctionClass)) {
- HiveGenericUdtf(functionClassName, Nil, children)
+ HiveGenericUdtf(new HiveFunctionCache(functionClassName), Nil,
children)
} else {
sys.error(s"No handler for udf ${functionInfo.getFunctionClass}")
}
}
}
-private[hive] trait HiveFunctionFactory {
- val functionClassName: String
-
- def createFunction[UDFType]() =
-
getContextOrSparkClassLoader.loadClass(functionClassName).newInstance.asInstanceOf[UDFType]
-}
-
-private[hive] abstract class HiveUdf extends Expression with Logging with
HiveFunctionFactory {
- self: Product =>
+/**
+ * This class provides the UDF creation and also the UDF instance
serialization and
+ * de-serialization cross process boundary.
+ *
+ * We use class instead of trait, seems property variables of trait cannot
be serialized when
+ * bundled with Case Class; in the other hand, we need to intercept the
UDF instance ser/de.
+ * the "Has-a" probably better than "Is-a".
+ * @param functionClassName UDF class name
+ */
+class HiveFunctionCache(var functionClassName: String) extends
java.io.Externalizable {
+ // for Serialization
+ def this() = this(null)
+
+ private var instance: Any = null
+
+ def writeExternal(out: java.io.ObjectOutput) {
+ // output the function name
+ out.writeUTF(functionClassName)
+
+ // Write a flag if instance is null or not
+ out.writeBoolean(instance != null)
--- End diff --
Does this relate to the `HiveSimpleUdf` and UDF bridge case you mentioned
offline? Would be better to leave a comment here. I was confused when reading
this.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]