This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new d1c6a00 [SPARK-32624][SQL][FOLLOWUP] Fix regression in
CodegenContext.addReferenceObj on nested Scala types
d1c6a00 is described below
commit d1c6a0021b9fe4c29a743e3504bcb0e59dfbbe45
Author: Kris Mok <[email protected]>
AuthorDate: Tue Sep 1 15:15:11 2020 +0900
[SPARK-32624][SQL][FOLLOWUP] Fix regression in
CodegenContext.addReferenceObj on nested Scala types
### What changes were proposed in this pull request?
Use `CodeGenerator.typeName()` instead of `Class.getCanonicalName()` in
`CodegenContext.addReferenceObj()` for getting the runtime class name for an
object.
### Why are the changes needed?
https://github.com/apache/spark/pull/29439 fixed a bug in
`CodegenContext.addReferenceObj()` for `Array[Byte]` (i.e. Spark SQL's
`BinaryType`) objects, but unfortunately it introduced a regression for some
nested Scala types.
For example, for `implicitly[Ordering[UTF8String]]`, after that PR
`CodegenContext.addReferenceObj()` would return `((null) references[0] /* ...
*/)`. The actual type for `implicitly[Ordering[UTF8String]]` is
`scala.math.LowPriorityOrderingImplicits$$anon$3` in Scala 2.12.10, and
`Class.getCanonicalName()` returns `null` for that class.
On the other hand, `Class.getName()` is safe to use for all non-array
types, and Janino will happily accept the type name returned from
`Class.getName()` for nested types. `CodeGenerator.typeName()` happens to do
the right thing by correctly handling arrays and otherwise use
`Class.getName()`. So it's a better alternative than `Class.getCanonicalName()`.
Side note: rule of thumb for using Java reflection in Spark: it may be
tempting to use `Class.getCanonicalName()`, but for functions that may need to
handle Scala types, please avoid it due to potential issues with nested Scala
types.
Instead, use `Class.getName()` or utility functions in
`org.apache.spark.util.Utils` (e.g. `Utils.getSimpleName()` or
`Utils.getFormattedClassName()` etc).
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added new unit test case for the regression case in `CodeGenerationSuite`.
Closes #29602 from rednaxelafx/spark-32624-followup.
Authored-by: Kris Mok <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
(cherry picked from commit 6e5bc39e17d4cf02806761170de6ddeb634aa343)
Signed-off-by: HyukjinKwon <[email protected]>
---
.../expressions/codegen/CodeGenerator.scala | 2 +-
.../catalyst/expressions/CodeGenerationSuite.scala | 21 ++++++++++++++++++---
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index c8aa83e..aa6d6e6 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -133,7 +133,7 @@ class CodegenContext extends Logging {
def addReferenceObj(objName: String, obj: Any, className: String = null):
String = {
val idx = references.length
references += obj
- val clsName = Option(className).getOrElse(obj.getClass.getCanonicalName)
+ val clsName =
Option(className).getOrElse(CodeGenerator.typeName(obj.getClass))
s"(($clsName) references[$idx] /* $objName */)"
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 09cc01d..f1de63a 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.expressions
import java.sql.Timestamp
+import scala.math.Ordering
+
import org.apache.spark.SparkFunSuite
import org.apache.spark.metrics.source.CodegenMetrics
import org.apache.spark.sql.Row
@@ -537,11 +539,24 @@ class CodeGenerationSuite extends SparkFunSuite with
ExpressionEvalHelper {
GenerateMutableProjection.generate(exprs, true)
}
- test("SPARK-32624: Use getCanonicalName to fix byte[] compile issue") {
+ test("SPARK-32624: Use CodeGenerator.typeName() to fix byte[] compile
issue") {
val ctx = new CodegenContext
val bytes = new Array[Byte](3)
- val byteObj = ctx.addReferenceObj("bytes", bytes)
- assert(byteObj == "((byte[]) references[0] /* bytes */)")
+ val refTerm = ctx.addReferenceObj("bytes", bytes)
+ assert(refTerm == "((byte[]) references[0] /* bytes */)")
+ }
+
+ test("SPARK-32624: CodegenContext.addReferenceObj should work for nested
Scala class") {
+ // emulate TypeUtils.getInterpretedOrdering(StringType)
+ val ctx = new CodegenContext
+ val comparator = implicitly[Ordering[UTF8String]]
+ val refTerm = ctx.addReferenceObj("comparator", comparator)
+
+ // Expecting result:
+ // "((scala.math.LowPriorityOrderingImplicits$$anon$3) references[0] /*
comparator */)"
+ // Using lenient assertions to be resilient to annonymous class numbering
changes
+ assert(!refTerm.contains("null"))
+ assert(refTerm.contains("scala.math.LowPriorityOrderingImplicits$$anon$"))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]