Github user MLnick commented on a diff in the pull request: https://github.com/apache/spark/pull/18118#discussion_r149760874 --- Diff: mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala --- @@ -166,6 +166,40 @@ class GBTRegressorSuite extends SparkFunSuite with MLlibTestSparkContext } ///////////////////////////////////////////////////////////////////////////// + // Tests of feature subset strategy + ///////////////////////////////////////////////////////////////////////////// + test("Tests of feature subset strategy") { + val numClasses = 2 + val gbt = new GBTRegressor() + .setMaxDepth(3) + .setMaxIter(5) + .setSubsamplingRate(1.0) + .setStepSize(0.5) + .setSeed(123) + .setFeatureSubsetStrategy("all") + + // In this data, feature 1 is very important. + val data: RDD[LabeledPoint] = TreeTests.featureImportanceData(sc) + val categoricalFeatures = Map.empty[Int, Int] + val df: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses) + + val importances = gbt.fit(df).featureImportances + val mostImportantFeature = importances.argmax + assert(mostImportantFeature === 1) + assert(importances.toArray.sum === 1.0) --- End diff -- You've kept the other assertions in this test (that were removed from the classifier test as per https://github.com/apache/spark/pull/18118#discussion_r148095940). If they're not necessary we should also remove them here.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org