This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8a6329870a84 [SPARK-50965][SQL][TESTS] Making sure that multiple
parameterized queries work on SparkConnect
8a6329870a84 is described below
commit 8a6329870a841f5e9bf467ffb95ee2f33168b19d
Author: viktorluc-db <[email protected]>
AuthorDate: Tue Jan 28 08:26:07 2025 -0800
[SPARK-50965][SQL][TESTS] Making sure that multiple parameterized queries
work on SparkConnect
### What changes were proposed in this pull request?
Tests only.
### Why are the changes needed?
Making sure that having multiple parametrization nodes in the parsed
logical plan is handled properly. Multiple parametrization nodes are made by
doing a union of different dataframes over SparkConnect, and this feature was
not supported previously, but
[this](https://github.com/apache/spark/pull/49442) PR managed to support it, so
testing for this feature was needed.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Tests in `ClientE2ETestSuite`.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #49628 from viktorluc-db/parametrization_tests.
Authored-by: viktorluc-db <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/connect/ClientE2ETestSuite.scala | 34 ++++++++++++++++++++--
1 file changed, 32 insertions(+), 2 deletions(-)
diff --git
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
index 1b73f9f2f454..b376a49b4634 100644
---
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
+++
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
@@ -41,7 +41,7 @@ import
org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.connect.ConnectConversions._
import org.apache.spark.sql.connect.client.{RetryPolicy, SparkConnectClient,
SparkResult}
-import org.apache.spark.sql.connect.test.{ConnectFunSuite,
IntegrationTestUtils, RemoteSparkSession, SQLHelper}
+import org.apache.spark.sql.connect.test.{ConnectFunSuite,
IntegrationTestUtils, QueryTest, RemoteSparkSession, SQLHelper}
import org.apache.spark.sql.connect.test.SparkConnectServerUtils.port
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SqlApiConf
@@ -49,7 +49,8 @@ import org.apache.spark.sql.types._
import org.apache.spark.util.SparkThreadUtils
class ClientE2ETestSuite
- extends ConnectFunSuite
+ extends QueryTest
+ with ConnectFunSuite
with RemoteSparkSession
with SQLHelper
with PrivateMethodTester {
@@ -1631,6 +1632,35 @@ class ClientE2ETestSuite
.create()
assert(sparkWithLowerMaxMessageSize.range(maxBatchSize).collect().length
== maxBatchSize)
}
+
+ test("SPARK-50965: Multiple positional parameterized nodes in the parsed
logical plan") {
+ var df = spark.sql("SELECT ?", Array(0))
+ for (i <- 1 until 3) {
+ val temp = spark.sql("SELECT ?", Array(i))
+ df = df.union(temp)
+ }
+ checkAnswer(df, (0 until 3).map(i => Row(i)))
+ }
+
+ test("SPARK-50965: Multiple named parameterized nodes in the parsed logical
plan") {
+ var df = spark.sql("SELECT :key", args = Map("key" -> 0))
+ for (i <- 1 until 3) {
+ val temp = spark.sql("SELECT :key", args = Map("key" -> i))
+ df = df.union(temp)
+ }
+ checkAnswer(df, (0 until 3).map(i => Row(i)))
+ }
+
+ test(
+ "SPARK-50965: Multiple named and positional parameterized nodes in the
parsed logical plan") {
+ var df = spark.sql("SELECT ?", Array(0))
+ df = df.union(spark.sql("SELECT :key", args = Map("key" -> 1)))
+ for (i <- 1 until 3) {
+ df = df.union(spark.sql("SELECT ?", Array(2 * i)))
+ df = df.union(spark.sql("SELECT :key", args = Map("key" -> (2 * i + 1))))
+ }
+ checkAnswer(df, (0 until 6).map(i => Row(i)))
+ }
}
private[sql] case class ClassData(a: String, b: Int)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]