[spark] branch branch-3.4 updated: [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema

hvanhovell Sun, 05 Mar 2023 18:08:16 -0800

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new ab7e182e7f0 [SPARK-42671][CONNECT] Fix bug for createDataFrame from 
complex type schema
ab7e182e7f0 is described below

commit ab7e182e7f0c4edb6f69b80e88d65973367c0cf3
Author: panbingkun <pbk1...@gmail.com>
AuthorDate: Sun Mar 5 22:07:48 2023 -0400

    [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema
    
    ### What changes were proposed in this pull request?
    The pr aims to fix bug for createDataFrame from complex type schema.
    
    ### Why are the changes needed?
    When I code UT for `DataFrameNaFunctions` as follow:
    val schema = new StructType()
          .add("c1", new StructType()
            .add("c1-1", StringType)
            .add("c1-2", StringType))
    val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
    val df = spark.createDataFrame(data.asJava, schema)
    df.show()
    
    I found that the above code does not work. The error message as follow:
    <img width="657" alt="image" 
src="https://user-images.githubusercontent.com/15246973/222938339-77dec8c6-549b-41de-869b-8a191a0f745e.png";>
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Add new UT
    Pass GA.
    
    Closes #40280 from panbingkun/fix_local_relation.
    
    Authored-by: panbingkun <pbk1...@gmail.com>
    Signed-off-by: Herman van Hovell <her...@databricks.com>
    (cherry picked from commit 5f999b0e06da8457a39678b9063ae4a47ae10f71)
    Signed-off-by: Herman van Hovell <her...@databricks.com>
---
 .../src/main/scala/org/apache/spark/sql/SparkSession.scala   |  2 +-
 .../test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 2b032b7cc8a..31a63720c5c 100644
--- 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -115,7 +115,7 @@ class SparkSession private[sql] (
   private def createDataset[T](encoder: AgnosticEncoder[T], data: 
Iterator[T]): Dataset[T] = {
     newDataset(encoder) { builder =>
       val localRelationBuilder = builder.getLocalRelationBuilder
-        .setSchema(encoder.schema.catalogString)
+        .setSchema(encoder.schema.json)
       if (data.nonEmpty) {
         val timeZoneId = conf.get("spark.sql.session.timeZone")
         val arrowData = ConvertToArrow(encoder, data, timeZoneId, allocator)
diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index 0397995bba6..79902e769c6 100644
--- 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -602,6 +602,18 @@ class ClientE2ETestSuite extends RemoteSparkSession {
     val newId = spark.newSession().sql("SELECT 1").analyze.getClientId
     assert(oldId != newId)
   }
+
+  test("createDataFrame from complex type schema") {
+    val schema = new StructType()
+      .add(
+        "c1",
+        new StructType()
+          .add("c1-1", StringType)
+          .add("c1-2", StringType))
+    val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
+    val result = spark.createDataFrame(data.asJava, schema).collect()
+    assert(result === data)
+  }
 }
 
 private[sql] case class MyType(id: Long, a: Double, b: Double)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.4 updated: [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema

Reply via email to