seancxmao commented on a change in pull request #23258:
[SPARK-23375][SQL][FOLLOWUP][TEST] Test Sort metrics while Sort is missing
URL: https://github.com/apache/spark/pull/23258#discussion_r244266394
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
##########
@@ -198,6 +199,52 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
}
}
}
+
+ private def metricStats(metricStr: String): Seq[String] = {
+ val sum = metricStr.substring(0,
metricStr.indexOf("(")).stripPrefix("\n").stripSuffix(" ")
+ val minMedMax = metricStr.substring(metricStr.indexOf("(") + 1,
metricStr.indexOf(")"))
+ .split(", ").toSeq
+ (sum +: minMedMax)
+ }
+
+ private def stringToBytes(str: String): (Float, String) = {
+ val matcher =
+ Pattern.compile("([0-9]+(\\.[0-9]+)?)
(EiB|PiB|TiB|GiB|MiB|KiB|B)").matcher(str)
+ if (matcher.matches()) {
+ (matcher.group(1).toFloat, matcher.group(3))
+ } else {
+ throw new NumberFormatException("Failed to parse byte string: " + str)
+ }
+ }
+
+ private def stringToDuration(str: String): (Float, String) = {
+ val matcher = Pattern.compile("([0-9]+(\\.[0-9]+)?)
(ms|s|m|h)").matcher(str)
+ if (matcher.matches()) {
+ (matcher.group(1).toFloat, matcher.group(3))
+ } else {
+ throw new NumberFormatException("Failed to parse time string: " + str)
+ }
+ }
+
+ /**
+ * Convert a size metric string to a sequence of stats, including sum, min,
med and max in order,
+ * each a tuple of (value, unit).
+ * @param metricStr size metric string, e.g. "\n96.2 MB (32.1 MB, 32.1 MB,
32.1 MB)"
+ * @return A sequence of stats, e.g. ((96.2,MB), (32.1,MB), (32.1,MB),
(32.1,MB))
+ */
+ protected def sizeMetricStats(metricStr: String): Seq[(Float, String)] = {
+ metricStats(metricStr).map(stringToBytes)
+ }
+
+ /**
+ * Convert a timing metric string to a sequence of stats, including sum,
min, med and max in
+ * order, each a tuple of (value, unit).
+ * @param metricStr timing metric string, e.g. "\n2.0 ms (1.0 ms, 1.0 ms,
1.0 ms)"
+ * @return A sequence of stats, e.g. ((2.0,ms), (1.0,ms), (1.0,ms), (1.0,ms))
+ */
+ protected def timingMetricStats(metricStr: String): Seq[(Float, String)] = {
+ metricStats(metricStr).map(stringToDuration)
+ }
Review comment:
As for checking metrics, checking ">= 0" is better than just checking
whether it is defined. because size or timing SQLMetric could be initialized by
non-0 values, e.g. -1.
https://github.com/apache/spark/blob/1e55f31e382cf67fd38ea8001d0b1d6b3bdcc586/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala#L109-L125
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]