szehon-ho commented on code in PR #47126:
URL: https://github.com/apache/spark/pull/47126#discussion_r1805444727
##########
sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala:
##########
@@ -1720,6 +1720,57 @@ class KeyGroupedPartitioningSuite extends
DistributionAndOrderingSuiteBase {
}
}
+
+ test("SPARK-47094: SPJ: Does not trigger when incompatible number of buckets
on both side") {
+ val table1 = "tab1e1"
+ val table2 = "table2"
+
+ Seq(
+ (2, 3),
+ (3, 4)
+ ).foreach {
+ case (table1buckets1, table2buckets1) =>
+ catalog.clearTables()
+
+ val partition1 = Array(bucket(table1buckets1, "store_id"))
+ val partition2 = Array(bucket(table2buckets1, "store_id"))
+
+ Seq((table1, partition1), (table2, partition2)).foreach { case (tab,
part) =>
+ createTable(tab, columns2, part)
+ val insertStr = s"INSERT INTO testcat.ns.$tab VALUES " +
+ "(0, 0, 'aa'), " +
+ "(1, 0, 'ab'), " + // duplicate partition key
+ "(2, 2, 'ac'), " +
+ "(3, 3, 'ad'), " +
+ "(4, 2, 'bc') "
+
+ sql(insertStr)
+ }
+
+ Seq(true, false).foreach { allowJoinKeysSubsetOfPartitionKeys =>
+ withSQLConf(
+ SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false",
+ SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+ SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key
-> "false",
+ SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key
->
+ allowJoinKeysSubsetOfPartitionKeys.toString,
+ SQLConf.V2_BUCKETING_ALLOW_COMPATIBLE_TRANSFORMS.key -> "true") {
+ val df = sql(
+ s"""
+ |${selectWithMergeJoinHint("t1", "t2")}
+ |t1.store_id, t1.dept_id, t1.data, t2.data
+ |FROM testcat.ns.$table1 t1 JOIN testcat.ns.$table2 t2
+ |ON t1.store_id = t2.store_id AND t1.dept_id = t2.dept_id
+ |ORDER BY t1.store_id, t1.dept_id, t1.data, t2.data
Review Comment:
Nit: no need for ORDER BY just to check the test, its only to check the
result.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]