This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new ab7e182e7f0 [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema ab7e182e7f0 is described below commit ab7e182e7f0c4edb6f69b80e88d65973367c0cf3 Author: panbingkun <pbk1...@gmail.com> AuthorDate: Sun Mar 5 22:07:48 2023 -0400 [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema ### What changes were proposed in this pull request? The pr aims to fix bug for createDataFrame from complex type schema. ### Why are the changes needed? When I code UT for `DataFrameNaFunctions` as follow: val schema = new StructType() .add("c1", new StructType() .add("c1-1", StringType) .add("c1-2", StringType)) val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null)) val df = spark.createDataFrame(data.asJava, schema) df.show() I found that the above code does not work. The error message as follow: <img width="657" alt="image" src="https://user-images.githubusercontent.com/15246973/222938339-77dec8c6-549b-41de-869b-8a191a0f745e.png"> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add new UT Pass GA. Closes #40280 from panbingkun/fix_local_relation. Authored-by: panbingkun <pbk1...@gmail.com> Signed-off-by: Herman van Hovell <her...@databricks.com> (cherry picked from commit 5f999b0e06da8457a39678b9063ae4a47ae10f71) Signed-off-by: Herman van Hovell <her...@databricks.com> --- .../src/main/scala/org/apache/spark/sql/SparkSession.scala | 2 +- .../test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala index 2b032b7cc8a..31a63720c5c 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -115,7 +115,7 @@ class SparkSession private[sql] ( private def createDataset[T](encoder: AgnosticEncoder[T], data: Iterator[T]): Dataset[T] = { newDataset(encoder) { builder => val localRelationBuilder = builder.getLocalRelationBuilder - .setSchema(encoder.schema.catalogString) + .setSchema(encoder.schema.json) if (data.nonEmpty) { val timeZoneId = conf.get("spark.sql.session.timeZone") val arrowData = ConvertToArrow(encoder, data, timeZoneId, allocator) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala index 0397995bba6..79902e769c6 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala @@ -602,6 +602,18 @@ class ClientE2ETestSuite extends RemoteSparkSession { val newId = spark.newSession().sql("SELECT 1").analyze.getClientId assert(oldId != newId) } + + test("createDataFrame from complex type schema") { + val schema = new StructType() + .add( + "c1", + new StructType() + .add("c1-1", StringType) + .add("c1-2", StringType)) + val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null)) + val result = spark.createDataFrame(data.asJava, schema).collect() + assert(result === data) + } } private[sql] case class MyType(id: Long, a: Double, b: Double) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org