Repository: spark
Updated Branches:
  refs/heads/branch-1.5 f5298da16 -> e4ea2390a


[SPARK-8744] [ML] Add a public constructor to StringIndexer

It would be helpful to allow users to pass a pre-computed index to create an 
indexer, rather than always going through StringIndexer to create the model.

Author: Holden Karau <hol...@pigscanfly.ca>

Closes #7267 from 
holdenk/SPARK-8744-StringIndexerModel-should-have-public-constructor.

(cherry picked from commit a7317ccdc20d001e5b7f5277b0535923468bfbc6)
Signed-off-by: Joseph K. Bradley <jos...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e4ea2390
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e4ea2390
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e4ea2390

Branch: refs/heads/branch-1.5
Commit: e4ea2390a5f64747dbc60febc4f3c29e1970e46d
Parents: f5298da
Author: Holden Karau <hol...@pigscanfly.ca>
Authored: Fri Aug 14 11:22:10 2015 -0700
Committer: Joseph K. Bradley <jos...@databricks.com>
Committed: Fri Aug 14 11:22:19 2015 -0700

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/ml/feature/StringIndexer.scala   | 4 +++-
 .../scala/org/apache/spark/ml/feature/StringIndexerSuite.scala   | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e4ea2390/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index b87e154..f5dfba1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -98,10 +98,12 @@ class StringIndexer(override val uid: String) extends 
Estimator[StringIndexerMod
  * This is a temporary fix for the case when target labels do not exist during 
prediction.
  */
 @Experimental
-class StringIndexerModel private[ml] (
+class StringIndexerModel (
     override val uid: String,
     labels: Array[String]) extends Model[StringIndexerModel] with 
StringIndexerBase {
 
+  def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), 
labels)
+
   private val labelToIndex: OpenHashMap[String, Double] = {
     val n = labels.length
     val map = new OpenHashMap[String, Double](n)

http://git-wip-us.apache.org/repos/asf/spark/blob/e4ea2390/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index 4a12e0b..d960861 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -30,7 +30,9 @@ class StringIndexerSuite extends SparkFunSuite with 
MLlibTestSparkContext {
   test("params") {
     ParamsSuite.checkParams(new StringIndexer)
     val model = new StringIndexerModel("indexer", Array("a", "b"))
+    val modelWithoutUid = new StringIndexerModel(Array("a", "b"))
     ParamsSuite.checkParams(model)
+    ParamsSuite.checkParams(modelWithoutUid)
   }
 
   test("StringIndexer") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to