Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/21719#discussion_r200497861
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala ---
@@ -35,32 +36,47 @@ import org.apache.spark.util.Utils
/**
* A small wrapper that defines a training session for an estimator, and
some methods to log
* useful information during this session.
- *
- * A new instance is expected to be created within fit().
- *
- * @param estimator the estimator that is being fit
- * @param dataset the training dataset
- * @tparam E the type of the estimator
*/
-private[spark] class Instrumentation[E <: Estimator[_]] private (
- val estimator: E,
- val dataset: RDD[_]) extends Logging {
+private[spark] class Instrumentation extends Logging {
private val id = UUID.randomUUID()
- private val prefix = {
- // estimator.getClass.getSimpleName can cause Malformed class name
error,
- // call safer `Utils.getSimpleName` instead
- val className = Utils.getSimpleName(estimator.getClass)
- s"$className-${estimator.uid}-${dataset.hashCode()}-$id: "
+ private val shortId = id.toString.take(8)
+ private var prefix = s"$shortId:"
+
+ // TODO: update spark.ml to use new Instrumentation APIs and remove this
constructor
+ var estimator: Estimator[_] = _
+ private def this(estimator: Estimator[_], dataset: RDD[_]) = {
+ this()
+ logContext(estimator, dataset)
}
- init()
+ /**
+ * Log info about the estimator and dataset being fit.
+ *
+ * @param estimator the estimator that is being fit
+ * @param dataset the training dataset
+ */
+ def logContext(estimator: Estimator[_], dataset: RDD[_]): Unit = {
--- End diff --
see my comment above
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]