Repository: spark Updated Branches: refs/heads/master a21791e31 -> 2c7394ad0
[SPARK-18019][ML] Add instrumentation to GBTs ## What changes were proposed in this pull request? Add instrumentation for logging in ML GBT, part of umbrella ticket [SPARK-14567](https://issues.apache.org/jira/browse/SPARK-14567) ## How was this patch tested? Tested locally: ```` 16/10/20 10:24:51 INFO Instrumentation: GBTRegressor-gbtr_2b460d3e2e93-1207021668-45: training: numPartitions=1 storageLevel=StorageLevel(1 replicas) 16/10/20 10:24:51 INFO Instrumentation: GBTRegressor-gbtr_2b460d3e2e93-1207021668-45: {"maxIter":1} 16/10/20 10:24:51 INFO Instrumentation: GBTRegressor-gbtr_2b460d3e2e93-1207021668-45: {"numFeatures":2} 16/10/20 10:24:51 INFO Instrumentation: GBTRegressor-gbtr_2b460d3e2e93-1207021668-45: {"numClasses":0} ... 16/10/20 15:54:21 INFO Instrumentation: GBTRegressor-gbtr_065fad465377-1922077832-22: training finished ```` Author: sethah <[email protected]> Closes #15574 from sethah/gbt_instr. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c7394ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c7394ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c7394ad Branch: refs/heads/master Commit: 2c7394ad096201cd721be7f532da9d97028cc577 Parents: a21791e Author: sethah <[email protected]> Authored: Tue Oct 25 13:11:21 2016 -0700 Committer: Joseph K. Bradley <[email protected]> Committed: Tue Oct 25 13:11:21 2016 -0700 ---------------------------------------------------------------------- .../apache/spark/ml/classification/GBTClassifier.scala | 10 +++++++++- .../org/apache/spark/ml/regression/GBTRegressor.scala | 9 ++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2c7394ad/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index ba70293..8bffe0c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -137,9 +137,17 @@ class GBTClassifier @Since("1.4.0") ( } val numFeatures = oldDataset.first().features.size val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Classification) + + val instr = Instrumentation.create(this, oldDataset) + instr.logParams(params: _*) + instr.logNumFeatures(numFeatures) + instr.logNumClasses(2) + val (baseLearners, learnerWeights) = GradientBoostedTrees.run(oldDataset, boostingStrategy, $(seed)) - new GBTClassificationModel(uid, baseLearners, learnerWeights, numFeatures) + val m = new GBTClassificationModel(uid, baseLearners, learnerWeights, numFeatures) + instr.logSuccess(m) + m } @Since("1.4.1") http://git-wip-us.apache.org/repos/asf/spark/blob/2c7394ad/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index bb01f9d..fa69d60 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -123,9 +123,16 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset) val numFeatures = oldDataset.first().features.size val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Regression) + + val instr = Instrumentation.create(this, oldDataset) + instr.logParams(params: _*) + instr.logNumFeatures(numFeatures) + val (baseLearners, learnerWeights) = GradientBoostedTrees.run(oldDataset, boostingStrategy, $(seed)) - new GBTRegressionModel(uid, baseLearners, learnerWeights, numFeatures) + val m = new GBTRegressionModel(uid, baseLearners, learnerWeights, numFeatures) + instr.logSuccess(m) + m } @Since("1.4.0") --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
