seancxmao commented on a change in pull request #23258: 
[SPARK-23375][SQL][FOLLOWUP][TEST] Test Sort metrics while Sort is missing
URL: https://github.com/apache/spark/pull/23258#discussion_r244299262
 
 

 ##########
 File path: 
sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
 ##########
 @@ -198,6 +199,52 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
       }
     }
   }
+
+  private def metricStats(metricStr: String): Seq[String] = {
+    val sum = metricStr.substring(0, 
metricStr.indexOf("(")).stripPrefix("\n").stripSuffix(" ")
+    val minMedMax = metricStr.substring(metricStr.indexOf("(") + 1, 
metricStr.indexOf(")"))
+      .split(", ").toSeq
+    (sum +: minMedMax)
+  }
+
+  private def stringToBytes(str: String): (Float, String) = {
+    val matcher =
+      Pattern.compile("([0-9]+(\\.[0-9]+)?) 
(EiB|PiB|TiB|GiB|MiB|KiB|B)").matcher(str)
+    if (matcher.matches()) {
+      (matcher.group(1).toFloat, matcher.group(3))
+    } else {
+      throw new NumberFormatException("Failed to parse byte string: " + str)
+    }
+  }
+
+  private def stringToDuration(str: String): (Float, String) = {
+    val matcher = Pattern.compile("([0-9]+(\\.[0-9]+)?) 
(ms|s|m|h)").matcher(str)
+    if (matcher.matches()) {
+      (matcher.group(1).toFloat, matcher.group(3))
+    } else {
+      throw new NumberFormatException("Failed to parse time string: " + str)
+    }
+  }
+
+  /**
+   * Convert a size metric string to a sequence of stats, including sum, min, 
med and max in order,
+   * each a tuple of (value, unit).
+   * @param metricStr size metric string, e.g. "\n96.2 MB (32.1 MB, 32.1 MB, 
32.1 MB)"
+   * @return A sequence of stats, e.g. ((96.2,MB), (32.1,MB), (32.1,MB), 
(32.1,MB))
+   */
+  protected def sizeMetricStats(metricStr: String): Seq[(Float, String)] = {
+    metricStats(metricStr).map(stringToBytes)
+  }
+
+  /**
+   * Convert a timing metric string to a sequence of stats, including sum, 
min, med and max in
+   * order, each a tuple of (value, unit).
+   * @param metricStr timing metric string, e.g. "\n2.0 ms (1.0 ms, 1.0 ms, 
1.0 ms)"
+   * @return A sequence of stats, e.g. ((2.0,ms), (1.0,ms), (1.0,ms), (1.0,ms))
+   */
+  protected def timingMetricStats(metricStr: String): Seq[(Float, String)] = {
+    metricStats(metricStr).map(stringToDuration)
+  }
 
 Review comment:
   In a new commit, I have added 
`SQLMetricsTestUtils#testSparkPlanMetricsWithPredicates`. In such a way, we 
simply need to provide a test spec in `test("Sort metrics")` to make the test 
case declarative rather than procedural.
   
   To simplify timing and size metric testing, I added 2 common predicates, 
`timingMetricAllStatsShould` and `sizeMetricAllStatsShould`. These could be 
used for other metrics as long as they are timing or size metrics.
   
   And I also modified the original `testSparkPlanMetrics` to make it a special 
case of `testSparkPlanMetricsWithPredicates`, where each expected metric value 
is converted to an equality predicate. This eliminated duplicate code as 
`testSparkPlanMetrics` and `testSparkPlanMetricsWithPredicates` are almost the 
same.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to