Repository: spark Updated Branches: refs/heads/branch-1.5 f5298da16 -> e4ea2390a
[SPARK-8744] [ML] Add a public constructor to StringIndexer It would be helpful to allow users to pass a pre-computed index to create an indexer, rather than always going through StringIndexer to create the model. Author: Holden Karau <hol...@pigscanfly.ca> Closes #7267 from holdenk/SPARK-8744-StringIndexerModel-should-have-public-constructor. (cherry picked from commit a7317ccdc20d001e5b7f5277b0535923468bfbc6) Signed-off-by: Joseph K. Bradley <jos...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e4ea2390 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e4ea2390 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e4ea2390 Branch: refs/heads/branch-1.5 Commit: e4ea2390a5f64747dbc60febc4f3c29e1970e46d Parents: f5298da Author: Holden Karau <hol...@pigscanfly.ca> Authored: Fri Aug 14 11:22:10 2015 -0700 Committer: Joseph K. Bradley <jos...@databricks.com> Committed: Fri Aug 14 11:22:19 2015 -0700 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/ml/feature/StringIndexer.scala | 4 +++- .../scala/org/apache/spark/ml/feature/StringIndexerSuite.scala | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e4ea2390/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index b87e154..f5dfba1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -98,10 +98,12 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod * This is a temporary fix for the case when target labels do not exist during prediction. */ @Experimental -class StringIndexerModel private[ml] ( +class StringIndexerModel ( override val uid: String, labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase { + def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), labels) + private val labelToIndex: OpenHashMap[String, Double] = { val n = labels.length val map = new OpenHashMap[String, Double](n) http://git-wip-us.apache.org/repos/asf/spark/blob/e4ea2390/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala index 4a12e0b..d960861 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala @@ -30,7 +30,9 @@ class StringIndexerSuite extends SparkFunSuite with MLlibTestSparkContext { test("params") { ParamsSuite.checkParams(new StringIndexer) val model = new StringIndexerModel("indexer", Array("a", "b")) + val modelWithoutUid = new StringIndexerModel(Array("a", "b")) ParamsSuite.checkParams(model) + ParamsSuite.checkParams(modelWithoutUid) } test("StringIndexer") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org