Github user hvanhovell commented on a diff in the pull request:
https://github.com/apache/spark/pull/20771#discussion_r173231610
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
---
@@ -599,8 +610,71 @@ case class MapObjects private(
override def children: Seq[Expression] = lambdaFunction :: inputData ::
Nil
- override def eval(input: InternalRow): Any =
- throw new UnsupportedOperationException("Only code-generated
evaluation is supported")
+ // The data with PythonUserDefinedType are actually stored with the data
type of its sqlType.
+ // When we want to apply MapObjects on it, we have to use it.
+ lazy private val inputDataType = inputData.dataType match {
+ case p: PythonUserDefinedType => p.sqlType
+ case _ => inputData.dataType
+ }
+
+ private def executeFuncOnCollection(inputCollection: Seq[_]): Seq[_] = {
+ inputCollection.map { element =>
+ val row = InternalRow.fromSeq(Seq(element))
+ lambdaFunction.eval(row)
+ }
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val inputCollection = inputData.eval(input)
+
+ if (inputCollection == null) {
+ return inputCollection
+ }
+
+ val results = inputDataType match {
--- End diff --
We shouldn't be doing this during eval. Please move this into a function
val.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]