c21 commented on a change in pull request #33494:
URL: https://github.com/apache/spark/pull/33494#discussion_r678883030
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
##########
@@ -396,27 +396,25 @@ class SQLMetricsSuite extends SharedSparkSession with
SQLMetricsTestUtils
}
}
- // TODO (SPARK-36272): Reenable this after we figure out why the expected
size doesn't
- // match after we adjust building's memory settings.
- ignore("SPARK-32629: ShuffledHashJoin(full outer) metrics") {
+ test("SPARK-32629: ShuffledHashJoin(full outer) metrics") {
val uniqueLeftDf = Seq(("1", "1"), ("11", "11")).toDF("key", "value")
val nonUniqueLeftDf = Seq(("1", "1"), ("1", "2"), ("11",
"11")).toDF("key", "value")
val rightDf = (1 to 10).map(i => (i.toString, i.toString)).toDF("key2",
"value")
Seq(
// Test unique key on build side
- (uniqueLeftDf, rightDf, 11, 134228048, 10, 134221824),
+ (uniqueLeftDf, rightDf, 11, 10),
// Test non-unique key on build side
- (nonUniqueLeftDf, rightDf, 12, 134228552, 11, 134221824)
- ).foreach { case (leftDf, rightDf, fojRows, fojBuildSize, rojRows,
rojBuildSize) =>
+ (nonUniqueLeftDf, rightDf, 12, 11)
+ ).foreach { case (leftDf, rightDf, fojRows, rojRows) =>
val fojDf = leftDf.hint("shuffle_hash").join(
rightDf, $"key" === $"key2", "full_outer")
fojDf.collect()
val fojPlan = fojDf.queryExecution.executedPlan.collectFirst {
case s: ShuffledHashJoinExec => s
}
assert(fojPlan.isDefined, "The query plan should have shuffled hash
join")
- testMetricsInSparkPlanOperator(fojPlan.get,
- Map("numOutputRows" -> fojRows, "buildDataSize" -> fojBuildSize))
+ testMetricsInSparkPlanOperator(fojPlan.get, Map("numOutputRows" ->
fojRows))
+ val fojBuildSize = fojPlan.get.metrics("buildDataSize").value
// Test right outer join as well to verify build data size to be
different
// from full outer join. This makes sure we take extra BitSet/OpenHashSet
Review comment:
well it's actually better to verify build size of full outer is larger
than build size of right outer. I can update comment if you think it's needed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]