Repository: spark Updated Branches: refs/heads/master 7297ae04d -> b24d3dba6
[SPARK-24290][ML] add support for Array input for instrumentation.logNamedValue ## What changes were proposed in this pull request? Extend instrumentation.logNamedValue to support Array input change the logging for "clusterSizes" to new method ## How was this patch tested? N/A Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lu WANG <lu.w...@databricks.com> Closes #21347 from ludatabricks/SPARK-24290. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b24d3dba Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b24d3dba Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b24d3dba Branch: refs/heads/master Commit: b24d3dba6571fd3c9e2649aceeaadc3f9c6cc90f Parents: 7297ae0 Author: Lu WANG <lu.w...@databricks.com> Authored: Mon Jun 4 14:54:31 2018 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Mon Jun 4 14:54:31 2018 -0700 ---------------------------------------------------------------------- .../apache/spark/ml/clustering/BisectingKMeans.scala | 3 +-- .../apache/spark/ml/clustering/GaussianMixture.scala | 3 +-- .../scala/org/apache/spark/ml/clustering/KMeans.scala | 3 +-- .../org/apache/spark/ml/util/Instrumentation.scala | 13 +++++++++++++ 4 files changed, 16 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b24d3dba/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 1ad4e09..9c96145 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -276,8 +276,7 @@ class BisectingKMeans @Since("2.0.0") ( val summary = new BisectingKMeansSummary( model.transform(dataset), $(predictionCol), $(featuresCol), $(k)) model.setSummary(Some(summary)) - // TODO: need to extend logNamedValue to support Array - instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]")) + instr.logNamedValue("clusterSizes", summary.clusterSizes) instr.logSuccess(model) model } http://git-wip-us.apache.org/repos/asf/spark/blob/b24d3dba/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index 3091bb5..64ecc1e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -426,8 +426,7 @@ class GaussianMixture @Since("2.0.0") ( $(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood) model.setSummary(Some(summary)) instr.logNamedValue("logLikelihood", logLikelihood) - // TODO: need to extend logNamedValue to support Array - instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]")) + instr.logNamedValue("clusterSizes", summary.clusterSizes) instr.logSuccess(model) model } http://git-wip-us.apache.org/repos/asf/spark/blob/b24d3dba/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index e72d7f9..1704412 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -359,8 +359,7 @@ class KMeans @Since("1.5.0") ( model.transform(dataset), $(predictionCol), $(featuresCol), $(k)) model.setSummary(Some(summary)) - // TODO: need to extend logNamedValue to support Array - instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]")) + instr.logNamedValue("clusterSizes", summary.clusterSizes) instr.logSuccess(model) if (handlePersistence) { instances.unpersist() http://git-wip-us.apache.org/repos/asf/spark/blob/b24d3dba/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala index 467130b..3a1c166 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala @@ -132,6 +132,19 @@ private[spark] class Instrumentation[E <: Estimator[_]] private ( log(compact(render(name -> value))) } + def logNamedValue(name: String, value: Array[String]): Unit = { + log(compact(render(name -> compact(render(value.toSeq))))) + } + + def logNamedValue(name: String, value: Array[Long]): Unit = { + log(compact(render(name -> compact(render(value.toSeq))))) + } + + def logNamedValue(name: String, value: Array[Double]): Unit = { + log(compact(render(name -> compact(render(value.toSeq))))) + } + + /** * Logs the successful completion of the training session. */ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org