(spark) branch master updated: [SPARK-50965][SQL][TESTS] Making sure that multiple parameterized queries work on SparkConnect

dongjoon Tue, 28 Jan 2025 08:26:26 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 8a6329870a84 [SPARK-50965][SQL][TESTS] Making sure that multiple 
parameterized queries work on SparkConnect
8a6329870a84 is described below

commit 8a6329870a841f5e9bf467ffb95ee2f33168b19d
Author: viktorluc-db <[email protected]>
AuthorDate: Tue Jan 28 08:26:07 2025 -0800

    [SPARK-50965][SQL][TESTS] Making sure that multiple parameterized queries 
work on SparkConnect
    
    ### What changes were proposed in this pull request?
    
    Tests only.
    
    ### Why are the changes needed?
    
    Making sure that having multiple parametrization nodes in the parsed 
logical plan is handled properly. Multiple parametrization nodes are made by 
doing a union of different dataframes over SparkConnect, and this feature was 
not supported previously, but 
[this](https://github.com/apache/spark/pull/49442) PR managed to support it, so 
testing for this feature was needed.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Tests in `ClientE2ETestSuite`.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #49628 from viktorluc-db/parametrization_tests.
    
    Authored-by: viktorluc-db <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/sql/connect/ClientE2ETestSuite.scala     | 34 ++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
index 1b73f9f2f454..b376a49b4634 100644
--- 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
@@ -41,7 +41,7 @@ import 
org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.connect.client.{RetryPolicy, SparkConnectClient, 
SparkResult}
-import org.apache.spark.sql.connect.test.{ConnectFunSuite, 
IntegrationTestUtils, RemoteSparkSession, SQLHelper}
+import org.apache.spark.sql.connect.test.{ConnectFunSuite, 
IntegrationTestUtils, QueryTest, RemoteSparkSession, SQLHelper}
 import org.apache.spark.sql.connect.test.SparkConnectServerUtils.port
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SqlApiConf
@@ -49,7 +49,8 @@ import org.apache.spark.sql.types._
 import org.apache.spark.util.SparkThreadUtils
 
 class ClientE2ETestSuite
-    extends ConnectFunSuite
+    extends QueryTest
+    with ConnectFunSuite
     with RemoteSparkSession
     with SQLHelper
     with PrivateMethodTester {
@@ -1631,6 +1632,35 @@ class ClientE2ETestSuite
       .create()
     assert(sparkWithLowerMaxMessageSize.range(maxBatchSize).collect().length 
== maxBatchSize)
   }
+
+  test("SPARK-50965: Multiple positional parameterized nodes in the parsed 
logical plan") {
+    var df = spark.sql("SELECT ?", Array(0))
+    for (i <- 1 until 3) {
+      val temp = spark.sql("SELECT ?", Array(i))
+      df = df.union(temp)
+    }
+    checkAnswer(df, (0 until 3).map(i => Row(i)))
+  }
+
+  test("SPARK-50965: Multiple named parameterized nodes in the parsed logical 
plan") {
+    var df = spark.sql("SELECT :key", args = Map("key" -> 0))
+    for (i <- 1 until 3) {
+      val temp = spark.sql("SELECT :key", args = Map("key" -> i))
+      df = df.union(temp)
+    }
+    checkAnswer(df, (0 until 3).map(i => Row(i)))
+  }
+
+  test(
+    "SPARK-50965: Multiple named and positional parameterized nodes in the 
parsed logical plan") {
+    var df = spark.sql("SELECT ?", Array(0))
+    df = df.union(spark.sql("SELECT :key", args = Map("key" -> 1)))
+    for (i <- 1 until 3) {
+      df = df.union(spark.sql("SELECT ?", Array(2 * i)))
+      df = df.union(spark.sql("SELECT :key", args = Map("key" -> (2 * i + 1))))
+    }
+    checkAnswer(df, (0 until 6).map(i => Row(i)))
+  }
 }
 
 private[sql] case class ClassData(a: String, b: Int)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-50965][SQL][TESTS] Making sure that multiple parameterized queries work on SparkConnect

Reply via email to