juliuszsompolski commented on code in PR #42069:
URL: https://github.com/apache/spark/pull/42069#discussion_r1278301295
##########
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala:
##########
@@ -1504,15 +1506,24 @@ class SparkConnectPlanner(val sessionHolder:
SessionHolder) extends Logging {
}
private def unpackUdf(fun: proto.CommonInlineUserDefinedFunction): UdfPacket
= {
- Utils.deserialize[UdfPacket](
- fun.getScalarScalaUdf.getPayload.toByteArray,
- Utils.getContextOrSparkClassLoader)
+ unpackScalarScalaUDF[UdfPacket](fun.getScalarScalaUdf)
}
private def unpackForeachWriter(fun: proto.ScalarScalaUDF):
ForeachWriterPacket = {
- Utils.deserialize[ForeachWriterPacket](
- fun.getPayload.toByteArray,
- Utils.getContextOrSparkClassLoader)
+ unpackScalarScalaUDF[ForeachWriterPacket](fun)
+ }
+
+ private def unpackScalarScalaUDF[T](fun: proto.ScalarScalaUDF): T = {
+ try {
+ logDebug(s"Unpack using class loader:
${Utils.getContextOrSparkClassLoader}")
+ Utils.deserialize[T](fun.getPayload.toByteArray,
Utils.getContextOrSparkClassLoader)
+ } catch {
+ case e: IOException if e.getCause.isInstanceOf[NoSuchMethodException] =>
+ throw new ClassNotFoundException(
+ s"Failed to load class correctly due to ${e.getCause}. " +
+ "Make sure the artifact where the class is defined is installed by
calling" +
+ " session.addArtifact.")
Review Comment:
In the description you write
> If the user code is actually needed to execute the UDF, we will return an
error message to suggest the user to add the missing classes using the
addArtifact method.
but since this triggers during deserialization, wouldn't this trigger also
for a class that is not actually used, just accidentally pulled in, and not
captured by the CONNECT_SCALA_UDF_STUB_CLASSES config?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]