This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new e7587df  [SPARK-31190][SQL] ScalaReflection should not erasure user 
defined AnyVal type
e7587df is described below

commit e7587df9478f96d5a949afda26fb4a6bf8636d6f
Author: yi.wu <[email protected]>
AuthorDate: Mon Mar 23 16:28:34 2020 +0800

    [SPARK-31190][SQL] ScalaReflection should not erasure user defined AnyVal 
type
    
    ### What changes were proposed in this pull request?
    
    Improve `ScalaReflection` to only don't erasure non user defined `AnyVal` 
type, but still erasure other types, e.g. `Any`. And this brings two benefits:
    
    1. Give better encode error message for some unsupported types, e.g. `Any`
    
    2. Won't miss the walk path for the `AnyVal` type
    
    ### Why are the changes needed?
    
    Firstly, PR #15284 added encode(serializeFor/deserializeFor) support for 
value class, which extends `AnyVal`, by not erasure types. But, this also 
introduce a problem that when user try to encoder unsupported types, e.g. 
`Any`, it will fail on `java.lang.ClassNotFoundException: scala.Any` due to the 
reason that `scala.Any` doesn't erasure to `java.lang.Object`.
    
    Also, in current `getClassNameFromType()`, it always erasure types which 
could missing walked path for user defined `AnyVal` types.
    
    ### Does this PR introduce any user-facing change?
    
    Yes. For the test below:
    
    ```
    case class Bar(i: Any)
    case class Foo(i: Bar) extends AnyVal
    
    test() {
      implicitly[ExpressionEncoder[Foo]]
    }
    ```
    
    Before:
    
    ```
    java.lang.ClassNotFoundException: scala.Any
     at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
     at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
     at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355)
     ...
    ````
    
    After:
    ```
    java.lang.UnsupportedOperationException: No Encoder found for Any
     - field (class: "java.lang.Object", name: "i")
     - field (class: "org.apache.spark.sql.catalyst.encoders.Bar", name: "i")
     - root class: "org.apache.spark.sql.catalyst.encoders.Foo"
     at 
org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerFor$1(ScalaReflection.scala:561)
    ```
    
    ### How was this patch tested?
    
    Added unit test and test manually.
    
    Closes #27959 from Ngone51/impr_anyval.
    
    Authored-by: yi.wu <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
    (cherry picked from commit 5c4d44bb83aafc296265524ec1ee09bcd11365ae)
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../spark/sql/catalyst/ScalaReflection.scala       | 45 ++++++++++++++--------
 .../catalyst/encoders/ExpressionEncoderSuite.scala |  9 +++++
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 1f7634b..f587245 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -611,10 +611,39 @@ object ScalaReflection extends ScalaReflection {
     }
   }
 
+  private def erasure(tpe: Type): Type = {
+    // For user-defined AnyVal classes, we should not erasure it. Otherwise, 
it will
+    // resolve to underlying type which wrapped by this class, e.g erasure
+    // `case class Foo(i: Int) extends AnyVal` will return type `Int` instead 
of `Foo`.
+    // But, for other types, we do need to erasure it. For example, we need to 
erasure
+    // `scala.Any` to `java.lang.Object` in order to load it from Java 
ClassLoader.
+    // Please see SPARK-17368 & SPARK-31190 for more details.
+    if (isSubtype(tpe, localTypeOf[AnyVal]) && 
!tpe.toString.startsWith("scala")) {
+      tpe
+    } else {
+      tpe.erasure
+    }
+  }
+
+  /**
+   * Returns the full class name for a type. The returned name is the canonical
+   * Scala name, where each component is separated by a period. It is NOT the
+   * Java-equivalent runtime name (no dollar signs).
+   *
+   * In simple cases, both the Scala and Java names are the same, however when 
Scala
+   * generates constructs that do not map to a Java equivalent, such as 
singleton objects
+   * or nested classes in package objects, it uses the dollar sign ($) to 
create
+   * synthetic classes, emulating behaviour in Java bytecode.
+   */
+  def getClassNameFromType(tpe: `Type`): String = {
+    erasure(tpe).dealias.typeSymbol.asClass.fullName
+  }
+
   /*
    * Retrieves the runtime class corresponding to the provided type.
    */
-  def getClassFromType(tpe: Type): Class[_] = 
mirror.runtimeClass(tpe.dealias.typeSymbol.asClass)
+  def getClassFromType(tpe: Type): Class[_] =
+    mirror.runtimeClass(erasure(tpe).dealias.typeSymbol.asClass)
 
   case class Schema(dataType: DataType, nullable: Boolean)
 
@@ -864,20 +893,6 @@ trait ScalaReflection extends Logging {
   }
 
   /**
-   * Returns the full class name for a type. The returned name is the canonical
-   * Scala name, where each component is separated by a period. It is NOT the
-   * Java-equivalent runtime name (no dollar signs).
-   *
-   * In simple cases, both the Scala and Java names are the same, however when 
Scala
-   * generates constructs that do not map to a Java equivalent, such as 
singleton objects
-   * or nested classes in package objects, it uses the dollar sign ($) to 
create
-   * synthetic classes, emulating behaviour in Java bytecode.
-   */
-  def getClassNameFromType(tpe: `Type`): String = {
-    tpe.dealias.erasure.typeSymbol.asClass.fullName
-  }
-
-  /**
    * Returns the parameter names and types for the primary constructor of this 
type.
    *
    * Note that it only works for scala classes with primary constructor, and 
currently doesn't
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index c1f1be3..66a1bbe 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -107,6 +107,8 @@ class UDTForCaseClass extends UserDefinedType[UDTCaseClass] 
{
   }
 }
 
+case class Bar(i: Any)
+case class Foo(i: Bar) extends AnyVal
 case class PrimitiveValueClass(wrapped: Int) extends AnyVal
 case class ReferenceValueClass(wrapped: ReferenceValueClass.Container) extends 
AnyVal
 object ReferenceValueClass {
@@ -311,6 +313,13 @@ class ExpressionEncoderSuite extends 
CodegenInterpretedPlanTest with AnalysisTes
 
   productTest(("UDT", new ExamplePoint(0.1, 0.2)))
 
+  test("AnyVal class with Any fields") {
+    val exception = 
intercept[UnsupportedOperationException](implicitly[ExpressionEncoder[Foo]])
+    val errorMsg = exception.getMessage
+    assert(errorMsg.contains("root class: 
\"org.apache.spark.sql.catalyst.encoders.Foo\""))
+    assert(errorMsg.contains("No Encoder found for Any"))
+  }
+
   test("nullable of encoder schema") {
     def checkNullable[T: ExpressionEncoder](nullable: Boolean*): Unit = {
       assert(implicitly[ExpressionEncoder[T]].schema.map(_.nullable) === 
nullable.toSeq)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to