Repository: spark
Updated Branches:
  refs/heads/branch-2.0 749c29bc0 -> 0858a82c1


[SPARK-15268][SQL] Make JavaTypeInference work with UDTRegistration

## What changes were proposed in this pull request?

We have a private `UDTRegistration` API to register user defined type. 
Currently `JavaTypeInference` can't work with it. So 
`SparkSession.createDataFrame` from a bean class will not correctly infer the 
schema of the bean class.

## How was this patch tested?
`VectorUDTSuite`.

Author: Liang-Chi Hsieh <sim...@tw.ibm.com>

Closes #13046 from viirya/fix-udt-registry-javatypeinference.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0858a82c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0858a82c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0858a82c

Branch: refs/heads/branch-2.0
Commit: 0858a82c141fe9b2d2c94a62c16657dcd6c3ec8b
Parents: 749c29b
Author: Liang-Chi Hsieh <sim...@tw.ibm.com>
Authored: Wed May 11 09:31:22 2016 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Wed May 11 09:33:55 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/ml/linalg/VectorUDTSuite.scala | 16 ++++++++++++++++
 .../spark/sql/catalyst/JavaTypeInference.scala      |  5 +++++
 2 files changed, 21 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0858a82c/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
index 6d01d8f..7b50876 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
@@ -17,9 +17,19 @@
 
 package org.apache.spark.ml.linalg
 
+import scala.beans.BeanInfo
+
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.JavaTypeInference
 import org.apache.spark.sql.types._
 
+@BeanInfo
+case class LabeledPoint(label: Double, features: Vector) {
+  override def toString: String = {
+    s"($label,$features)"
+  }
+}
+
 class VectorUDTSuite extends SparkFunSuite {
 
   test("preloaded VectorUDT") {
@@ -36,4 +46,10 @@ class VectorUDTSuite extends SparkFunSuite {
       assert(udt.simpleString == "vector")
     }
   }
+
+  test("JavaTypeInference with VectorUDT") {
+    val (dataType, _) = JavaTypeInference.inferDataType(classOf[LabeledPoint])
+    assert(dataType.asInstanceOf[StructType].fields.map(_.dataType)
+      === Seq(new VectorUDT, DoubleType))
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/0858a82c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 6f9fbbb..92caf8f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -63,6 +63,11 @@ object JavaTypeInference {
       case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
         (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), 
true)
 
+      case c: Class[_] if UDTRegistration.exists(c.getName) =>
+        val udt = UDTRegistration.getUDTFor(c.getName).get.newInstance()
+          .asInstanceOf[UserDefinedType[_ >: Null]]
+        (udt, true)
+
       case c: Class[_] if c == classOf[java.lang.String] => (StringType, true)
       case c: Class[_] if c == classOf[Array[Byte]] => (BinaryType, true)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to