dongjoon-hyun commented on code in PR #38950:
URL: https://github.com/apache/spark/pull/38950#discussion_r1042722698
##########
sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala:
##########
@@ -404,8 +409,103 @@ class KeyGroupedPartitioningSuite extends
DistributionAndOrderingSuiteBase {
s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
"ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id,
purchase_price, sale_price")
+ checkAnswer(df,
+ Seq(Row(1, "aa", 40.0, 42.0), Row(2, "bb", 10.0, 11.0)))
+
+ val shuffles = collectShuffles(df.queryExecution.executedPlan)
+ assert(shuffles.isEmpty, "should not add shuffle when partition keys
mismatch")
+ }
+
+ test("partitioned join: partition values from one side are subset of those
from the other side") {
+ val items_partitions = Array(bucket(4, "id"))
+ createTable(items, items_schema, items_partitions)
+
+ sql(s"INSERT INTO testcat.ns.$items VALUES " +
+ s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
Review Comment:
nit. `s"` -> `"`
##########
sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala:
##########
@@ -404,8 +409,103 @@ class KeyGroupedPartitioningSuite extends
DistributionAndOrderingSuiteBase {
s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
"ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id,
purchase_price, sale_price")
+ checkAnswer(df,
+ Seq(Row(1, "aa", 40.0, 42.0), Row(2, "bb", 10.0, 11.0)))
+
+ val shuffles = collectShuffles(df.queryExecution.executedPlan)
+ assert(shuffles.isEmpty, "should not add shuffle when partition keys
mismatch")
+ }
+
+ test("partitioned join: partition values from one side are subset of those
from the other side") {
+ val items_partitions = Array(bucket(4, "id"))
+ createTable(items, items_schema, items_partitions)
+
+ sql(s"INSERT INTO testcat.ns.$items VALUES " +
+ s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+ s"(3, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+ s"(4, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+ val purchases_partitions = Array(bucket(4, "item_id"))
+ createTable(purchases, purchases_schema, purchases_partitions)
+
+ sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+ s"(1, 42.0, cast('2020-01-01' as timestamp)), " +
Review Comment:
ditto
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]