This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 59977a84257e [SPARK-54720][SQL] Add SparkSession.emptyDataFrame with a
schema
59977a84257e is described below
commit 59977a84257e3009eff856e06b60e6eb0890b97a
Author: Herman van Hövell <[email protected]>
AuthorDate: Fri Dec 19 10:03:27 2025 -0400
[SPARK-54720][SQL] Add SparkSession.emptyDataFrame with a schema
### What changes were proposed in this pull request?
This PR adds a version of `SparkSession.emptyDataFrame` that takes a schema.
### Why are the changes needed?
It makes it easier to create an empty DataFrame in Scala.
### Does this PR introduce _any_ user-facing change?
Yes, it adds a new API.
### How was this patch tested?
I have added a test case.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #53489 from hvanhovell/SPARK-54720.
Authored-by: Herman van Hövell <[email protected]>
Signed-off-by: Herman van Hövell <[email protected]>
---
.../src/main/scala/org/apache/spark/sql/SparkSession.scala | 8 +++++++-
.../SparkSessionBuilderImplementationBindingSuite.scala | 14 ++++++++++++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala
b/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 8e7ae51e998f..9c6e3bdb9078 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -210,9 +210,15 @@ abstract class SparkSession extends Serializable with
Closeable {
*
* @since 2.0.0
*/
- @transient
def emptyDataFrame: DataFrame
+ /**
+ * Returns a `DataFrame` with schema `schema` and no rows.
+ *
+ * @since 4.2.0
+ */
+ def emptyDataFrame(schema: StructType): DataFrame =
emptyDataset(Encoders.row(schema))
+
/**
* Creates a `DataFrame` from a local Seq of Product.
*
diff --git
a/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
b/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
index 57eddd1bc69f..95c82cbdbdb6 100644
---
a/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
+++
b/sql/api/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.funsuite.AnyFunSuite
import org.apache.spark.SparkContext
import org.apache.spark.sql.functions.{max, sum}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField,
StructType}
/**
* Test suite for SparkSession implementation binding.
@@ -70,4 +71,17 @@ trait SparkSessionBuilderImplementationBindingSuite
val df = ctx.createDataset(1 to 11).select(max("value").as[Long])
assert(df.head() == 11)
}
+
+ test("emptyDataFrame with Schema") {
+ val session = SparkSession.builder().getOrCreate()
+ val schema =
+ new StructType(Array(StructField("a", IntegerType), StructField("b",
StringType)))
+ val df = session.emptyDataFrame(schema)
+ assert(df.schema == schema)
+ assert(df.isEmpty)
+ val derivedSchema = new StructType(Array(StructField("a", IntegerType)))
+ val derivedDf = df.select("a")
+ assert(derivedDf.schema == derivedSchema)
+ assert(derivedDf.isEmpty)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]